{ "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "#%%\n", "\"\"\"File 15panelDiscreteBis.py\n", "\n", ":author: Michel Bierlaire, EPFL\n", ":date: Sun Sep 8 19:30:31 2019\n", "\n", " Example of a discrete mixture of logit models, also called latent class model.\n", " The datafile is organized as panel data.\n", " Here, we integrate before the discrete mixture to show that it is equivalent.\n", " Three alternatives: Train, Car and Swissmetro\n", " SP data\n", "\"\"\"\n", "\n", "import pandas as pd\n", "import biogeme.database as db\n", "import biogeme.biogeme as bio\n", "import biogeme.models as models\n", "import biogeme.messaging as msg\n", "from biogeme.expressions import Beta, DefineVariable, bioDraws, \\\n", " PanelLikelihoodTrajectory, MonteCarlo, log\n", "\n", "# Read the data\n", "df = pd.read_csv('swissmetro.dat', '\\t')\n", "database = db.Database('swissmetro', df)\n", "\n", "# They are organized as panel data. The variable ID identifies each individual.\n", "database.panel(\"ID\")\n", "\n", "# The Pandas data structure is available as database.data. Use all the\n", "# Pandas functions to invesigate the database\n", "#print(database.data.describe())\n", "\n", "# The following statement allows you to use the names of the variable\n", "# as Python variable.\n", "globals().update(database.variables)\n", "\n", "# Removing some observations can be done directly using pandas.\n", "#remove = (((database.data.PURPOSE != 1) &\n", "# (database.data.PURPOSE != 3)) |\n", "# (database.data.CHOICE == 0))\n", "#database.data.drop(database.data[remove].index,inplace=True)\n", "\n", "# Here we use the \"biogeme\" way for backward compatibility\n", "exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0\n", "database.remove(exclude)\n", "\n", "# Parameters to be estimated. One version for each latent class.\n", "numberOfClasses = 2\n", "B_COST = [Beta(f'B_COST_class{i}', 0, None, None, 0) for i in range(numberOfClasses)]\n", "\n", "# Define a random parameter, normally distributed across individuals,\n", "# designed to be used for Monte-Carlo simulation\n", "B_TIME = [Beta(f'B_TIME_class{i}', 0, None, None, 0) for i in range(numberOfClasses)]\n", "\n", "# It is advised not to use 0 as starting value for the following parameter.\n", "B_TIME_S = [Beta(f'B_TIME_S_class{i}', 1, None, None, 0) for i in range(numberOfClasses)]\n", "B_TIME_RND = [B_TIME[i] + B_TIME_S[i] * bioDraws(f'B_TIME_RND_class{i}', 'NORMAL_ANTI')\n", " for i in range(numberOfClasses)]\n", "\n", "# We do the same for the constants, to address serial correlation.\n", "ASC_CAR = [Beta(f'ASC_CAR_class{i}', 0, None, None, 0) for i in range(numberOfClasses)]\n", "ASC_CAR_S = [Beta(f'ASC_CAR_S_class{i}', 1, None, None, 0) for i in range(numberOfClasses)]\n", "ASC_CAR_RND = [ASC_CAR[i] + ASC_CAR_S[i] * bioDraws(f'ASC_CAR_RND_class{i}', 'NORMAL_ANTI')\n", " for i in range(numberOfClasses)]\n", "\n", "ASC_TRAIN = [Beta(f'ASC_TRAIN_class{i}', 0, None, None, 0) for i in range(numberOfClasses)]\n", "ASC_TRAIN_S = [Beta(f'ASC_TRAIN_S_class{i}', 1, None, None, 0) for i in range(numberOfClasses)]\n", "ASC_TRAIN_RND = [ASC_TRAIN[i] + ASC_TRAIN_S[i] * bioDraws(f'ASC_TRAIN_RND_class{i}', 'NORMAL_ANTI')\n", " for i in range(numberOfClasses)]\n", "\n", "ASC_SM = [Beta(f'ASC_SM_class{i}', 0, None, None, 1) for i in range(numberOfClasses)]\n", "ASC_SM_S = [Beta(f'ASC_SM_S_class{i}', 1, None, None, 0) for i in range(numberOfClasses)]\n", "ASC_SM_RND = [ASC_SM[i] + ASC_SM_S[i] * bioDraws(f'ASC_SM_RND_class{i}', 'NORMAL_ANTI')\n", " for i in range(numberOfClasses)]\n", "\n", "# Class memebership probability\n", "PROB_class0 = Beta('PROB_class0', 0.5, 0, 1, 0)\n", "PROB_class1 = 1 - PROB_class0\n", "\n", "# Definition of new variables\n", "SM_COST = SM_CO * (GA == 0)\n", "TRAIN_COST = TRAIN_CO * (GA == 0)\n", "\n", "# Definition of new variables: adding columns to the database\n", "CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV * (SP != 0), database)\n", "TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV * (SP != 0), database)\n", "TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED', TRAIN_TT / 100.0, database)\n", "TRAIN_COST_SCALED = DefineVariable('TRAIN_COST_SCALED', TRAIN_COST / 100, database)\n", "SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0, database)\n", "SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100, database)\n", "CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100, database)\n", "CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100, database)\n", "\n", "# In class 0, it is assumed that the time coefficient is zero\n", "B_TIME_RND[0] = 0\n", "\n", "# Utility functions\n", "V1 = [ASC_TRAIN_RND[i] + B_TIME_RND[i] * TRAIN_TT_SCALED + B_COST[i] * TRAIN_COST_SCALED\n", " for i in range(numberOfClasses)]\n", "V2 = [ASC_SM_RND[i] + B_TIME_RND[i] * SM_TT_SCALED + B_COST[i] * SM_COST_SCALED\n", " for i in range(numberOfClasses)]\n", "V3 = [ASC_CAR_RND[i] + B_TIME_RND[i] * CAR_TT_SCALED + B_COST[i] * CAR_CO_SCALED\n", " for i in range(numberOfClasses)]\n", "V = [{1: V1[i],\n", " 2: V2[i],\n", " 3: V3[i]} for i in range(numberOfClasses)]\n", "\n", "# Associate the availability conditions with the alternatives\n", "av = {1: TRAIN_AV_SP,\n", " 2: SM_AV,\n", " 3: CAR_AV_SP}\n", "\n", "# The choice model is a discrete mixture of logit, with availability conditions\n", "# We calculate the conditional probability for each class\n", "prob = [MonteCarlo(PanelLikelihoodTrajectory(models.logit(V[i], av, CHOICE)))\n", " for i in range(numberOfClasses)]\n", "\n", "# Conditional to the random variables, likelihood for the individual.\n", "probIndiv = PROB_class0 * prob[0] + PROB_class1 * prob[1]\n", "\n", "# We integrate over the random variables using Monte-Carlo\n", "logprob = log(probIndiv)\n", "\n", "# Define level of verbosity\n", "logger = msg.bioMessage()\n", "#logger.setSilent()\n", "#logger.setWarning()\n", "logger.setGeneral()\n", "#logger.setDetailed()\n", "\n", "# Create the Biogeme object\n", "biogeme = bio.BIOGEME(database, logprob, numberOfDraws=100000)\n", "biogeme.modelName = '15panelDiscreteBis'\n", "\n", "# As the estimation may take a while and risk to be interrupted, we save the iterations,\n", "# and restore them before the estimation.\n", "fname = \"__15panelDiscreteBis.iters\"\n", "biogeme.loadSavedIteration(filename=fname)\n", "# Estimate the parameters.\n", "results = biogeme.estimate(saveIterations=True, file_iterations=fname)\n", "pandasResults = results.getEstimatedParameters()\n", "print(pandasResults)\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 }