{ "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "#%%\n", "\"\"\"File 05normalMixture_allAlgos.py\n", "\n", ":author: Michel Bierlaire, EPFL\n", ":date: Fri May 1 11:59:20 2020\n", "\n", " Example of a mixture of logit models, using Monte-Carlo integration.\n", " Three alternatives: Train, Car and Swissmetro\n", " SP data\n", "\"\"\"\n", "\n", "import pandas as pd\n", "import biogeme.optimization as opt\n", "import biogeme.database as db\n", "import biogeme.biogeme as bio\n", "import biogeme.models as models\n", "import biogeme.messaging as msg\n", "from biogeme.expressions import Beta, DefineVariable, bioDraws, log, MonteCarlo\n", "\n", "# Read the data\n", "df = pd.read_csv('swissmetro.dat', '\\t')\n", "database = db.Database('swissmetro', df)\n", "\n", "# The Pandas data structure is available as database.data. Use all the\n", "# Pandas functions to investigate the database. For example:\n", "#print(database.data.describe())\n", "\n", "# The following statement allows you to use the names of the variable\n", "# as Python variable.\n", "globals().update(database.variables)\n", "\n", "# Removing some observations can be done directly using pandas.\n", "#remove = (((database.data.PURPOSE != 1) &\n", "# (database.data.PURPOSE != 3)) |\n", "# (database.data.CHOICE == 0))\n", "#database.data.drop(database.data[remove].index,inplace=True)\n", "\n", "# Here we use the \"biogeme\" way for backward compatibility\n", "exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0\n", "database.remove(exclude)\n", "\n", "# Parameters to be estimated\n", "ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)\n", "ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)\n", "ASC_SM = Beta('ASC_SM', 0, None, None, 1)\n", "B_COST = Beta('B_COST', 0, None, None, 0)\n", "\n", "# Define a random parameter, normally distributed, designed to be used\n", "# for Monte-Carlo simulation\n", "B_TIME = Beta('B_TIME', 0, None, None, 0)\n", "\n", "# It is advised not to use 0 as starting value for the following parameter.\n", "B_TIME_S = Beta('B_TIME_S', 1, None, None, 0)\n", "B_TIME_RND = B_TIME + B_TIME_S * bioDraws('B_TIME_RND', 'NORMAL')\n", "\n", "# Definition of new variables\n", "SM_COST = SM_CO * (GA == 0)\n", "TRAIN_COST = TRAIN_CO * (GA == 0)\n", "\n", "# Definition of new variables: adding columns to the database\n", "CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV * (SP != 0), database)\n", "TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV * (SP != 0), database)\n", "TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED', TRAIN_TT / 100.0, database)\n", "TRAIN_COST_SCALED = DefineVariable('TRAIN_COST_SCALED', TRAIN_COST / 100, database)\n", "SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0, database)\n", "SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100, database)\n", "CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100, database)\n", "CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100, database)\n", "\n", "# Definition of the utility functions\n", "V1 = ASC_TRAIN + \\\n", " B_TIME_RND * TRAIN_TT_SCALED + \\\n", " B_COST * TRAIN_COST_SCALED\n", "V2 = ASC_SM + \\\n", " B_TIME_RND * SM_TT_SCALED + \\\n", " B_COST * SM_COST_SCALED\n", "V3 = ASC_CAR + \\\n", " B_TIME_RND * CAR_TT_SCALED + \\\n", " B_COST * CAR_CO_SCALED\n", "\n", "# Associate utility functions with the numbering of alternatives\n", "V = {1: V1,\n", " 2: V2,\n", " 3: V3}\n", "\n", "# Associate the availability conditions with the alternatives\n", "av = {1: TRAIN_AV_SP,\n", " 2: SM_AV,\n", " 3: CAR_AV_SP}\n", "\n", "# Conditional to B_TIME_RND, we have a logit model (called the kernel)\n", "prob = models.logit(V, av, CHOICE)\n", "\n", "# We integrate over B_TIME_RND using Monte-Carlo\n", "logprob = log(MonteCarlo(prob))\n", "\n", "# Define level of verbosity\n", "logger = msg.bioMessage()\n", "#logger.setSilent()\n", "#logger.setWarning()\n", "logger.setGeneral()\n", "#logger.setDetailed()\n", "\n", "# Create the Biogeme object\n", "biogeme = bio.BIOGEME(database, logprob, numberOfDraws=100000)\n", "\n", "algos = {'CFSQP ': None,\n", " 'scipy ': opt.scipy,\n", " 'Line search ': opt.newtonLineSearchForBiogeme,\n", " 'Trust region (dogleg) ': opt.newtonTrustRegionForBiogeme,\n", " 'Trust region (cg) ': opt.newtonTrustRegionForBiogeme,\n", " 'LS-BFGS ': opt.bfgsLineSearchForBiogeme,\n", " 'TR-BFGS ': opt.bfgsTrustRegionForBiogeme,\n", " 'Simple bounds Newton fCG': opt.simpleBoundsNewtonAlgorithmForBiogeme,\n", " 'Simple bounds BFGS fCG ': opt.simpleBoundsNewtonAlgorithmForBiogeme,\n", " 'Simple bounds hybrid fCG': opt.simpleBoundsNewtonAlgorithmForBiogeme,\n", " 'Simple bounds Newton iCG': opt.simpleBoundsNewtonAlgorithmForBiogeme,\n", " 'Simple bounds BFGS iCG ': opt.simpleBoundsNewtonAlgorithmForBiogeme,\n", " 'Simple bounds hybrid iCG': opt.simpleBoundsNewtonAlgorithmForBiogeme,\n", "}\n", "\n", "algoParameters = {'Trust region (dogleg) ': {'dogleg':True},\n", " 'Trust region (cg) ': {'dogleg':False},\n", " 'Simple bounds Newton fCG': {'proportionAnalyticalHessian': 1.0,\n", " 'infeasibleConjugateGradient': False},\n", " 'Simple bounds BFGS fCG ': {'proportionAnalyticalHessian': 0.0,\n", " 'infeasibleConjugateGradient': False},\n", " 'Simple bounds hybrid fCG': {'proportionAnalyticalHessian': 0.5,\n", " 'infeasibleConjugateGradient': False},\n", " 'Simple bounds Newton iCG': {'proportionAnalyticalHessian': 1.0,\n", " 'infeasibleConjugateGradient': True},\n", " 'Simple bounds BFGS iCG ': {'proportionAnalyticalHessian': 0.0,\n", " 'infeasibleConjugateGradient': True},\n", " 'Simple bounds hybrid iCG': {'proportionAnalyticalHessian': 0.5,\n", " 'infeasibleConjugateGradient': True}}\n", "\n", "results = {}\n", "msg = ''\n", "for name, algo in algos.items():\n", " biogeme.modelName = f'05normalMixture_allAlgos_{name}'.strip()\n", " p = algoParameters.get(name)\n", " results[name] = biogeme.estimate(algorithm=algo, algoParameters=p)\n", " g = results[name].data.g\n", " msg += (f'{name}\\t{results[name].data.logLike:.2f}\\t'\n", " f'{results[name].data.gradientNorm:.2g}\\t'\n", " f'{results[name].data.optimizationMessages[\"Optimization time\"]}'\n", " f'\\t{results[name].data.optimizationMessages[\"Cause of termination\"]}\\n')\n", "\n", "print('Algorithm\\t\\tloglike\\t\\tnormg\\ttime\\t\\tdiagnostic')\n", "print('+++++++++\\t\\t+++++++\\t\\t+++++\\t++++\\t\\t++++++++++')\n", "print(msg)\n", "\n", "\"\"\"\n", "Here are the results. Note that the draws are identical for all runs. Still, the algorithms\n", "may converge to different solutions. Some algorithms obtain a solution with\n", "B_TIME_S = 1.65 (LL = -5215.588),\n", "and some obtain a solution with\n", "B_TIME_S = -1.65 (LL = -5215.204).\n", "Both are local optima of the likelihood function. As the draws are not exactly symmetric,\n", "these solutions have different values for the objective functions. If the number of draws is\n", "increased, the two local solutions will (asymptotically) become identical.\n", "\n", "Algorithm loglike normg time diagnostic\n", "+++++++++ +++++++ +++++ ++++ ++++++++++\n", "CFSQP -5215.59 0.00037 0:01:32.434504 b'Normal termination. Obj: 6.05545e-06 Const: 6.05545e-06'\n", "scipy -5215.59 0.00024 0:00:26.443081 b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'\n", "Line search -5215.59 3.8e-06 0:01:39.359296 Relative gradient = 9e-10 <= 6.1e-06\n", "Trust region (dogleg) -5215.20 0.0066 0:00:27.050893 Relative gradient = 1.5e-06 <= 6.1e-06\n", "Trust region (cg) -5215.20 0.013 0:00:33.720330 Relative gradient = 2.9e-06 <= 6.1e-06\n", "LS-BFGS -5215.59 0.017 0:01:33.475099 Relative gradient = 2.9e-06 <= 6.1e-06\n", "TR-BFGS -5215.59 0.018 0:01:22.466036 Relative gradient = 3.1e-06 <= 6.1e-06\n", "Simple bounds Newton fCG -5215.59 0.026 0:00:30.640365 Relative gradient = 5.1e-06 <= 6.1e-06\n", "Simple bounds BFGS fCG -5215.59 0.027 0:01:28.154424 Relative gradient = 4.7e-06 <= 6.1e-06\n", "Simple bounds hybrid fCG -5215.59 0.031 0:00:37.804337 Relative gradient = 3.9e-06 <= 6.1e-06\n", "Simple bounds Newton iCG -5215.20 0.017 0:00:26.515896 Relative gradient = 4e-06 <= 6.1e-06\n", "Simple bounds BFGS iCG -5215.59 0.027 0:01:28.327714 Relative gradient = 4.7e-06 <= 6.1e-06\n", "Simple bounds hybrid iCG -5215.20 0.0044 0:00:32.788981 Relative gradient = 8.9e-07 <= 6.1e-06\n", "\"\"\"\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 }