{ "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "#%%\n", "\"\"\"File 06serialCorrelation.py\n", "\n", "Choice model with the latent variable.\n", "Mixture of logit, with agent effect to deal with serial correlation.\n", "Measurement equation for the indicators.\n", "Maximum likelihood (full information) estimation.\n", "\n", ":author: Michel Bierlaire, EPFL\n", ":date: Wed Sep 11 08:27:18 2019\n", "\n", "\"\"\"\n", "\n", "import sys\n", "import pandas as pd\n", "import biogeme.database as db\n", "import biogeme.biogeme as bio\n", "from biogeme import models\n", "import biogeme.results as res\n", "import biogeme.messaging as msg\n", "import biogeme.optimization as opt\n", "from biogeme.expressions import (\n", " Beta,\n", " DefineVariable,\n", " bioDraws,\n", " MonteCarlo,\n", " Elem,\n", " bioNormalCdf,\n", " exp,\n", " log,\n", ")\n", "\n", "# Read the data\n", "df = pd.read_csv('optima.dat', sep='\\t')\n", "database = db.Database('optima', df)\n", "\n", "# The following statement allows you to use the names of the variable\n", "# as Python variable.\n", "globals().update(database.variables)\n", "\n", "# Exclude observations such that the chosen alternative is -1\n", "database.remove(Choice == -1.0)\n", "\n", "\n", "# Read the estimates from the previous estimation, and use\n", "# them as starting values\n", "try:\n", " results = res.bioResults(pickleFile='05latentChoiceFull.pickle')\n", "except FileNotFoundError:\n", " print(\n", " 'Run first the script 05latentChoiceFull.py in order to generate the '\n", " 'file 05latentChoiceFull.pickle.'\n", " )\n", " sys.exit()\n", "\n", "betas = results.getBetaValues()\n", "\n", "### Variables\n", "\n", "# Piecewise linear definition of income\n", "ScaledIncome = DefineVariable(\n", " 'ScaledIncome', CalculatedIncome / 1000, database\n", ")\n", "thresholds = [None, 4, 6, 8, 10, None]\n", "formulaIncome = models.piecewiseFormula(\n", " ScaledIncome,\n", " thresholds,\n", " [\n", " betas['beta_ScaledIncome_lessthan_4'],\n", " betas['beta_ScaledIncome_4_6'],\n", " betas['beta_ScaledIncome_6_8'],\n", " betas['beta_ScaledIncome_8_10'],\n", " betas['beta_ScaledIncome_10_more'],\n", " ],\n", ")\n", "\n", "# Definition of other variables\n", "age_65_more = DefineVariable('age_65_more', age >= 65, database)\n", "moreThanOneCar = DefineVariable('moreThanOneCar', NbCar > 1, database)\n", "moreThanOneBike = DefineVariable('moreThanOneBike', NbBicy > 1, database)\n", "individualHouse = DefineVariable('individualHouse', HouseType == 1, database)\n", "male = DefineVariable('male', Gender == 1, database)\n", "haveChildren = DefineVariable(\n", " 'haveChildren', ((FamilSitu == 3) + (FamilSitu == 4)) > 0, database\n", ")\n", "haveGA = DefineVariable('haveGA', GenAbST == 1, database)\n", "highEducation = DefineVariable('highEducation', Education >= 6, database)\n", "\n", "\n", "### Coefficients\n", "\n", "coef_intercept = Beta('coef_intercept', betas['coef_intercept'], None, None, 0)\n", "coef_age_65_more = Beta(\n", " 'coef_age_65_more', betas['coef_age_65_more'], None, None, 0\n", ")\n", "coef_haveGA = Beta('coef_haveGA', betas['coef_haveGA'], None, None, 0)\n", "\n", "coef_moreThanOneCar = Beta(\n", " 'coef_moreThanOneCar', betas['coef_moreThanOneCar'], None, None, 0\n", ")\n", "coef_moreThanOneBike = Beta(\n", " 'coef_moreThanOneBike', betas['coef_moreThanOneBike'], None, None, 0\n", ")\n", "coef_individualHouse = Beta(\n", " 'coef_individualHouse', betas['coef_individualHouse'], None, None, 0\n", ")\n", "coef_male = Beta('coef_male', betas['coef_male'], None, None, 0)\n", "coef_haveChildren = Beta(\n", " 'coef_haveChildren', betas['coef_haveChildren'], None, None, 0\n", ")\n", "coef_highEducation = Beta(\n", " 'coef_highEducation', betas['coef_highEducation'], None, None, 0\n", ")\n", "\n", "\n", "### Latent variable: structural equation\n", "\n", "# Define a random parameter, normally distributed, designed to be used\n", "# for Monte-Carlo integration\n", "omega = bioDraws('omega', 'NORMAL')\n", "sigma_s = Beta('sigma_s', betas['sigma_s'], None, None, 0)\n", "\n", "#\n", "# Deal with serial correlation by including an error component\n", "# that is individual specific\n", "errorComponent = bioDraws('errorComponent', 'NORMAL')\n", "ec_sigma = Beta('ec_sigma', 1, None, None, 0)\n", "\n", "CARLOVERS = (\n", " coef_intercept\n", " + coef_age_65_more * age_65_more\n", " + formulaIncome\n", " + coef_moreThanOneCar * moreThanOneCar\n", " + coef_moreThanOneBike * moreThanOneBike\n", " + coef_individualHouse * individualHouse\n", " + coef_male * male\n", " + coef_haveChildren * haveChildren\n", " + coef_haveGA * haveGA\n", " + coef_highEducation * highEducation\n", " + sigma_s * omega\n", " + ec_sigma * errorComponent\n", ")\n", "\n", "### Measurement equations\n", "\n", "INTER_Envir01 = Beta('INTER_Envir01', 0, None, None, 1)\n", "INTER_Envir02 = Beta('INTER_Envir02', betas['INTER_Envir02'], None, None, 0)\n", "INTER_Envir03 = Beta('INTER_Envir03', betas['INTER_Envir03'], None, None, 0)\n", "INTER_Mobil11 = Beta('INTER_Mobil11', betas['INTER_Mobil11'], None, None, 0)\n", "INTER_Mobil14 = Beta('INTER_Mobil14', betas['INTER_Mobil14'], None, None, 0)\n", "INTER_Mobil16 = Beta('INTER_Mobil16', betas['INTER_Mobil16'], None, None, 0)\n", "INTER_Mobil17 = Beta('INTER_Mobil17', betas['INTER_Mobil17'], None, None, 0)\n", "\n", "B_Envir01_F1 = Beta('B_Envir01_F1', -1, None, None, 1)\n", "B_Envir02_F1 = Beta('B_Envir02_F1', betas['B_Envir02_F1'], None, None, 0)\n", "B_Envir03_F1 = Beta('B_Envir03_F1', betas['B_Envir03_F1'], None, None, 0)\n", "B_Mobil11_F1 = Beta('B_Mobil11_F1', betas['B_Mobil11_F1'], None, None, 0)\n", "B_Mobil14_F1 = Beta('B_Mobil14_F1', betas['B_Mobil14_F1'], None, None, 0)\n", "B_Mobil16_F1 = Beta('B_Mobil16_F1', betas['B_Mobil16_F1'], None, None, 0)\n", "B_Mobil17_F1 = Beta('B_Mobil17_F1', betas['B_Mobil17_F1'], None, None, 0)\n", "\n", "MODEL_Envir01 = INTER_Envir01 + B_Envir01_F1 * CARLOVERS\n", "MODEL_Envir02 = INTER_Envir02 + B_Envir02_F1 * CARLOVERS\n", "MODEL_Envir03 = INTER_Envir03 + B_Envir03_F1 * CARLOVERS\n", "MODEL_Mobil11 = INTER_Mobil11 + B_Mobil11_F1 * CARLOVERS\n", "MODEL_Mobil14 = INTER_Mobil14 + B_Mobil14_F1 * CARLOVERS\n", "MODEL_Mobil16 = INTER_Mobil16 + B_Mobil16_F1 * CARLOVERS\n", "MODEL_Mobil17 = INTER_Mobil17 + B_Mobil17_F1 * CARLOVERS\n", "\n", "SIGMA_STAR_Envir01 = Beta('SIGMA_STAR_Envir01', 1, None, None, 1)\n", "SIGMA_STAR_Envir02 = Beta(\n", " 'SIGMA_STAR_Envir02', betas['SIGMA_STAR_Envir02'], None, None, 0\n", ")\n", "SIGMA_STAR_Envir03 = Beta(\n", " 'SIGMA_STAR_Envir03', betas['SIGMA_STAR_Envir03'], None, None, 0\n", ")\n", "SIGMA_STAR_Mobil11 = Beta(\n", " 'SIGMA_STAR_Mobil11', betas['SIGMA_STAR_Mobil11'], None, None, 0\n", ")\n", "SIGMA_STAR_Mobil14 = Beta(\n", " 'SIGMA_STAR_Mobil14', betas['SIGMA_STAR_Mobil14'], None, None, 0\n", ")\n", "SIGMA_STAR_Mobil16 = Beta(\n", " 'SIGMA_STAR_Mobil16', betas['SIGMA_STAR_Mobil16'], None, None, 0\n", ")\n", "SIGMA_STAR_Mobil17 = Beta(\n", " 'SIGMA_STAR_Mobil17', betas['SIGMA_STAR_Mobil17'], None, None, 0\n", ")\n", "\n", "delta_1 = Beta('delta_1', betas['delta_1'], 0, 10, 0)\n", "delta_2 = Beta('delta_2', betas['delta_2'], 0, 10, 0)\n", "tau_1 = -delta_1 - delta_2\n", "tau_2 = -delta_1\n", "tau_3 = delta_1\n", "tau_4 = delta_1 + delta_2\n", "\n", "Envir01_tau_1 = (tau_1 - MODEL_Envir01) / SIGMA_STAR_Envir01\n", "Envir01_tau_2 = (tau_2 - MODEL_Envir01) / SIGMA_STAR_Envir01\n", "Envir01_tau_3 = (tau_3 - MODEL_Envir01) / SIGMA_STAR_Envir01\n", "Envir01_tau_4 = (tau_4 - MODEL_Envir01) / SIGMA_STAR_Envir01\n", "IndEnvir01 = {\n", " 1: bioNormalCdf(Envir01_tau_1),\n", " 2: bioNormalCdf(Envir01_tau_2) - bioNormalCdf(Envir01_tau_1),\n", " 3: bioNormalCdf(Envir01_tau_3) - bioNormalCdf(Envir01_tau_2),\n", " 4: bioNormalCdf(Envir01_tau_4) - bioNormalCdf(Envir01_tau_3),\n", " 5: 1 - bioNormalCdf(Envir01_tau_4),\n", " 6: 1.0,\n", " -1: 1.0,\n", " -2: 1.0,\n", "}\n", "\n", "P_Envir01 = Elem(IndEnvir01, Envir01)\n", "\n", "\n", "Envir02_tau_1 = (tau_1 - MODEL_Envir02) / SIGMA_STAR_Envir02\n", "Envir02_tau_2 = (tau_2 - MODEL_Envir02) / SIGMA_STAR_Envir02\n", "Envir02_tau_3 = (tau_3 - MODEL_Envir02) / SIGMA_STAR_Envir02\n", "Envir02_tau_4 = (tau_4 - MODEL_Envir02) / SIGMA_STAR_Envir02\n", "IndEnvir02 = {\n", " 1: bioNormalCdf(Envir02_tau_1),\n", " 2: bioNormalCdf(Envir02_tau_2) - bioNormalCdf(Envir02_tau_1),\n", " 3: bioNormalCdf(Envir02_tau_3) - bioNormalCdf(Envir02_tau_2),\n", " 4: bioNormalCdf(Envir02_tau_4) - bioNormalCdf(Envir02_tau_3),\n", " 5: 1 - bioNormalCdf(Envir02_tau_4),\n", " 6: 1.0,\n", " -1: 1.0,\n", " -2: 1.0,\n", "}\n", "\n", "P_Envir02 = Elem(IndEnvir02, Envir02)\n", "\n", "Envir03_tau_1 = (tau_1 - MODEL_Envir03) / SIGMA_STAR_Envir03\n", "Envir03_tau_2 = (tau_2 - MODEL_Envir03) / SIGMA_STAR_Envir03\n", "Envir03_tau_3 = (tau_3 - MODEL_Envir03) / SIGMA_STAR_Envir03\n", "Envir03_tau_4 = (tau_4 - MODEL_Envir03) / SIGMA_STAR_Envir03\n", "IndEnvir03 = {\n", " 1: bioNormalCdf(Envir03_tau_1),\n", " 2: bioNormalCdf(Envir03_tau_2) - bioNormalCdf(Envir03_tau_1),\n", " 3: bioNormalCdf(Envir03_tau_3) - bioNormalCdf(Envir03_tau_2),\n", " 4: bioNormalCdf(Envir03_tau_4) - bioNormalCdf(Envir03_tau_3),\n", " 5: 1 - bioNormalCdf(Envir03_tau_4),\n", " 6: 1.0,\n", " -1: 1.0,\n", " -2: 1.0,\n", "}\n", "\n", "P_Envir03 = Elem(IndEnvir03, Envir03)\n", "\n", "Mobil11_tau_1 = (tau_1 - MODEL_Mobil11) / SIGMA_STAR_Mobil11\n", "Mobil11_tau_2 = (tau_2 - MODEL_Mobil11) / SIGMA_STAR_Mobil11\n", "Mobil11_tau_3 = (tau_3 - MODEL_Mobil11) / SIGMA_STAR_Mobil11\n", "Mobil11_tau_4 = (tau_4 - MODEL_Mobil11) / SIGMA_STAR_Mobil11\n", "IndMobil11 = {\n", " 1: bioNormalCdf(Mobil11_tau_1),\n", " 2: bioNormalCdf(Mobil11_tau_2) - bioNormalCdf(Mobil11_tau_1),\n", " 3: bioNormalCdf(Mobil11_tau_3) - bioNormalCdf(Mobil11_tau_2),\n", " 4: bioNormalCdf(Mobil11_tau_4) - bioNormalCdf(Mobil11_tau_3),\n", " 5: 1 - bioNormalCdf(Mobil11_tau_4),\n", " 6: 1.0,\n", " -1: 1.0,\n", " -2: 1.0,\n", "}\n", "\n", "P_Mobil11 = Elem(IndMobil11, Mobil11)\n", "\n", "Mobil14_tau_1 = (tau_1 - MODEL_Mobil14) / SIGMA_STAR_Mobil14\n", "Mobil14_tau_2 = (tau_2 - MODEL_Mobil14) / SIGMA_STAR_Mobil14\n", "Mobil14_tau_3 = (tau_3 - MODEL_Mobil14) / SIGMA_STAR_Mobil14\n", "Mobil14_tau_4 = (tau_4 - MODEL_Mobil14) / SIGMA_STAR_Mobil14\n", "IndMobil14 = {\n", " 1: bioNormalCdf(Mobil14_tau_1),\n", " 2: bioNormalCdf(Mobil14_tau_2) - bioNormalCdf(Mobil14_tau_1),\n", " 3: bioNormalCdf(Mobil14_tau_3) - bioNormalCdf(Mobil14_tau_2),\n", " 4: bioNormalCdf(Mobil14_tau_4) - bioNormalCdf(Mobil14_tau_3),\n", " 5: 1 - bioNormalCdf(Mobil14_tau_4),\n", " 6: 1.0,\n", " -1: 1.0,\n", " -2: 1.0,\n", "}\n", "\n", "P_Mobil14 = Elem(IndMobil14, Mobil14)\n", "\n", "Mobil16_tau_1 = (tau_1 - MODEL_Mobil16) / SIGMA_STAR_Mobil16\n", "Mobil16_tau_2 = (tau_2 - MODEL_Mobil16) / SIGMA_STAR_Mobil16\n", "Mobil16_tau_3 = (tau_3 - MODEL_Mobil16) / SIGMA_STAR_Mobil16\n", "Mobil16_tau_4 = (tau_4 - MODEL_Mobil16) / SIGMA_STAR_Mobil16\n", "IndMobil16 = {\n", " 1: bioNormalCdf(Mobil16_tau_1),\n", " 2: bioNormalCdf(Mobil16_tau_2) - bioNormalCdf(Mobil16_tau_1),\n", " 3: bioNormalCdf(Mobil16_tau_3) - bioNormalCdf(Mobil16_tau_2),\n", " 4: bioNormalCdf(Mobil16_tau_4) - bioNormalCdf(Mobil16_tau_3),\n", " 5: 1 - bioNormalCdf(Mobil16_tau_4),\n", " 6: 1.0,\n", " -1: 1.0,\n", " -2: 1.0,\n", "}\n", "\n", "P_Mobil16 = Elem(IndMobil16, Mobil16)\n", "\n", "Mobil17_tau_1 = (tau_1 - MODEL_Mobil17) / SIGMA_STAR_Mobil17\n", "Mobil17_tau_2 = (tau_2 - MODEL_Mobil17) / SIGMA_STAR_Mobil17\n", "Mobil17_tau_3 = (tau_3 - MODEL_Mobil17) / SIGMA_STAR_Mobil17\n", "Mobil17_tau_4 = (tau_4 - MODEL_Mobil17) / SIGMA_STAR_Mobil17\n", "IndMobil17 = {\n", " 1: bioNormalCdf(Mobil17_tau_1),\n", " 2: bioNormalCdf(Mobil17_tau_2) - bioNormalCdf(Mobil17_tau_1),\n", " 3: bioNormalCdf(Mobil17_tau_3) - bioNormalCdf(Mobil17_tau_2),\n", " 4: bioNormalCdf(Mobil17_tau_4) - bioNormalCdf(Mobil17_tau_3),\n", " 5: 1 - bioNormalCdf(Mobil17_tau_4),\n", " 6: 1.0,\n", " -1: 1.0,\n", " -2: 1.0,\n", "}\n", "\n", "P_Mobil17 = Elem(IndMobil17, Mobil17)\n", "\n", "# Choice model\n", "\n", "ASC_CAR = Beta('ASC_CAR', betas['ASC_CAR'], None, None, 0)\n", "ASC_PT = Beta('ASC_PT', 0, None, None, 1)\n", "ASC_SM = Beta('ASC_SM', betas['ASC_SM'], None, None, 0)\n", "BETA_COST_HWH = Beta('BETA_COST_HWH', betas['BETA_COST_HWH'], None, None, 0)\n", "BETA_COST_OTHER = Beta(\n", " 'BETA_COST_OTHER', betas['BETA_COST_OTHER'], None, None, 0\n", ")\n", "BETA_DIST = Beta('BETA_DIST', betas['BETA_DIST'], None, None, 0)\n", "BETA_TIME_CAR_REF = Beta(\n", " 'BETA_TIME_CAR_REF', betas['BETA_TIME_CAR_REF'], None, 0, 0\n", ")\n", "BETA_TIME_CAR_CL = Beta(\n", " 'BETA_TIME_CAR_CL', betas['BETA_TIME_CAR_CL'], -10, 10, 0\n", ")\n", "BETA_TIME_PT_REF = Beta(\n", " 'BETA_TIME_PT_REF', betas['BETA_TIME_PT_REF'], None, 0, 0\n", ")\n", "BETA_TIME_PT_CL = Beta('BETA_TIME_PT_CL', betas['BETA_TIME_PT_CL'], -10, 10, 0)\n", "BETA_WAITING_TIME = Beta(\n", " 'BETA_WAITING_TIME', betas['BETA_WAITING_TIME'], None, None, 0\n", ")\n", "\n", "TimePT_scaled = DefineVariable('TimePT_scaled', TimePT / 200, database)\n", "TimeCar_scaled = DefineVariable('TimeCar_scaled', TimeCar / 200, database)\n", "MarginalCostPT_scaled = DefineVariable(\n", " 'MarginalCostPT_scaled', MarginalCostPT / 10, database\n", ")\n", "CostCarCHF_scaled = DefineVariable(\n", " 'CostCarCHF_scaled', CostCarCHF / 10, database\n", ")\n", "distance_km_scaled = DefineVariable(\n", " 'distance_km_scaled', distance_km / 5, database\n", ")\n", "PurpHWH = DefineVariable('PurpHWH', TripPurpose == 1, database)\n", "PurpOther = DefineVariable('PurpOther', TripPurpose != 1, database)\n", "\n", "### DEFINITION OF UTILITY FUNCTIONS:\n", "\n", "BETA_TIME_PT = BETA_TIME_PT_REF * exp(BETA_TIME_PT_CL * CARLOVERS)\n", "\n", "V0 = (\n", " ASC_PT\n", " + BETA_TIME_PT * TimePT_scaled\n", " + BETA_WAITING_TIME * WaitingTimePT\n", " + BETA_COST_HWH * MarginalCostPT_scaled * PurpHWH\n", " + BETA_COST_OTHER * MarginalCostPT_scaled * PurpOther\n", " + ec_sigma * errorComponent\n", ")\n", "\n", "BETA_TIME_CAR = BETA_TIME_CAR_REF * exp(BETA_TIME_CAR_CL * CARLOVERS)\n", "\n", "V1 = (\n", " ASC_CAR\n", " + BETA_TIME_CAR * TimeCar_scaled\n", " + BETA_COST_HWH * CostCarCHF_scaled * PurpHWH\n", " + BETA_COST_OTHER * CostCarCHF_scaled * PurpOther\n", " + ec_sigma * errorComponent\n", ")\n", "\n", "V2 = ASC_SM + BETA_DIST * distance_km_scaled\n", "\n", "# Associate utility functions with the numbering of alternatives\n", "V = {0: V0, 1: V1, 2: V2}\n", "\n", "# Conditional to the random parameters, we have a logit model (called\n", "# the kernel) for the choice\n", "condprob = models.logit(V, None, Choice)\n", "\n", "# Conditional to the random parameters, we have the product of ordered\n", "# probit for the indicators.\n", "condlike = (\n", " P_Envir01\n", " * P_Envir02\n", " * P_Envir03\n", " * P_Mobil11\n", " * P_Mobil14\n", " * P_Mobil16\n", " * P_Mobil17\n", " * condprob\n", ")\n", "\n", "# We integrate over omega using Monte-Carlo integration\n", "loglike = log(MonteCarlo(condlike))\n", "\n", "# Define level of verbosity\n", "logger = msg.bioMessage()\n", "# logger.setSilent()\n", "# logger.setWarning()\n", "logger.setGeneral()\n", "# logger.setDetailed()\n", "\n", "# Create the Biogeme object\n", "biogeme = bio.BIOGEME(database, loglike, numberOfDraws=20000)\n", "biogeme.modelName = '06serialCorrelation'\n", "\n", "# Estimate the parameters\n", "results = biogeme.estimate(algorithm=opt.bioBfgs)\n", "print(f'Estimated betas: {len(results.data.betaValues)}')\n", "print(f'Final log likelihood: {results.data.logLike:.3f}')\n", "print(f'Output file: {results.data.htmlFileName}')\n", "results.writeLaTeX()\n", "print(f'LaTeX file: {results.data.latexFileName}')\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 }