{ "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "#%%\n", "\"\"\"File 05normalMixture_simul.py\n", "\n", ":author: Michel Bierlaire, EPFL\n", ":date: Sat Sep 7 18:42:55 2019\n", "\n", " Example of a mixture of logit models, using Monte-Carlo integration, and\n", " used for simulatiom\n", " Three alternatives: Train, Car and Swissmetro\n", " SP data\n", "\"\"\"\n", "\n", "import sys\n", "import numpy as np\n", "import pandas as pd\n", "import biogeme.database as db\n", "import biogeme.biogeme as bio\n", "from biogeme import models\n", "import biogeme.results as res\n", "from biogeme.expressions import Beta, DefineVariable, bioDraws, MonteCarlo\n", "import matplotlib.pyplot as plt\n", "\n", "# Read the data\n", "df = pd.read_csv('swissmetro.dat', sep='\\t')\n", "database = db.Database('swissmetro', df)\n", "\n", "# The Pandas data structure is available as database.data. Use all the\n", "# Pandas functions to investigate the database. For example:\n", "# print(database.data.describe())\n", "\n", "# The following statement allows you to use the names of the variable\n", "# as Python variable.\n", "globals().update(database.variables)\n", "\n", "# Removing some observations can be done directly using pandas.\n", "# remove = (((database.data.PURPOSE != 1) &\n", "# (database.data.PURPOSE != 3)) |\n", "# (database.data.CHOICE == 0))\n", "# database.data.drop(database.data[remove].index,inplace=True)\n", "\n", "# Here we use the \"biogeme\" way for backward compatibility\n", "exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0\n", "database.remove(exclude)\n", "\n", "# Parameters to be estimated\n", "ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)\n", "ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)\n", "ASC_SM = Beta('ASC_SM', 0, None, None, 1)\n", "B_COST = Beta('B_COST', 0, None, None, 0)\n", "\n", "# Define a random parameter, normally distributed, designed to be used\n", "# for Monte-Carlo simulation\n", "B_TIME = Beta('B_TIME', 0, None, None, 0)\n", "\n", "# It is advised not to use 0 as starting value for the following parameter.\n", "B_TIME_S = Beta('B_TIME_S', 1, None, None, 0)\n", "B_TIME_RND = B_TIME + B_TIME_S * bioDraws('B_TIME_RND', 'NORMAL')\n", "\n", "# Definition of new variables\n", "SM_COST = SM_CO * (GA == 0)\n", "TRAIN_COST = TRAIN_CO * (GA == 0)\n", "\n", "# Definition of new variables: adding columns to the database\n", "CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV * (SP != 0), database)\n", "TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV * (SP != 0), database)\n", "TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED', TRAIN_TT / 100.0, database)\n", "TRAIN_COST_SCALED = DefineVariable(\n", " 'TRAIN_COST_SCALED', TRAIN_COST / 100, database\n", ")\n", "SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0, database)\n", "SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100, database)\n", "CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100, database)\n", "CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100, database)\n", "\n", "# Definition of the utility functions\n", "V1 = ASC_TRAIN + B_TIME_RND * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED\n", "V2 = ASC_SM + B_TIME_RND * SM_TT_SCALED + B_COST * SM_COST_SCALED\n", "V3 = ASC_CAR + B_TIME_RND * CAR_TT_SCALED + B_COST * CAR_CO_SCALED\n", "\n", "# Associate utility functions with the numbering of alternatives\n", "V = {1: V1, 2: V2, 3: V3}\n", "\n", "# Associate the availability conditions with the alternatives\n", "av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}\n", "\n", "# The estimation results are read from the pickle file\n", "try:\n", " results = res.bioResults(pickleFile='05normalMixture.pickle')\n", "except FileNotFoundError:\n", " print(\n", " 'Run first the script 05normalMixture.py in order to generate the '\n", " 'file 05normalMixture.pickle.'\n", " )\n", " sys.exit()\n", "\n", "# Conditional to B_TIME_RND, we have a logit model (called the kernel)\n", "prob = models.logit(V, av, CHOICE)\n", "\n", "# We calculate the integration error. Note that this formula assumes\n", "# independent draws, and is not valid for Haltom or antithetic draws.\n", "numberOfDraws = 100000\n", "integral = MonteCarlo(prob)\n", "integralSquare = MonteCarlo(prob * prob)\n", "variance = integralSquare - integral * integral\n", "error = (variance / 2.0) ** 0.5\n", "\n", "# And the value of the individual parameters\n", "numerator = MonteCarlo(B_TIME_RND * prob)\n", "denominator = integral\n", "\n", "simulate = {\n", " 'Numerator': numerator,\n", " 'Denominator': denominator,\n", " 'Integral': integral,\n", " 'Integration error': error,\n", "}\n", "\n", "# Create the Biogeme object\n", "biosim = bio.BIOGEME(database, simulate, numberOfDraws=numberOfDraws)\n", "biosim.modelName = \"05normalMixture_simul\"\n", "\n", "# Simulate the requested quantities. The output is a Pandas data frame\n", "simresults = biosim.simulate(results.getBetaValues())\n", "\n", "# 95% confidence interval on the log likelihood.\n", "simresults['left'] = np.log(\n", " simresults['Integral'] - 1.96 * simresults['Integration error']\n", ")\n", "simresults['right'] = np.log(\n", " simresults['Integral'] + 1.96 * simresults['Integration error']\n", ")\n", "\n", "print(f'Log likelihood: {np.log(simresults[\"Integral\"]).sum()}')\n", "print(\n", " f'Integration error for {numberOfDraws} draws: '\n", " f'{simresults[\"Integration error\"].sum()}'\n", ")\n", "print(f'In average {simresults[\"Integration error\"].mean()} per observation.')\n", "print(\n", " f'95% confidence interval: [{simresults[\"left\"].sum()}-'\n", " f'{simresults[\"right\"].sum()}]'\n", ")\n", "\n", "# Post processing to obtain the individual parameters\n", "simresults['beta'] = simresults['Numerator'] / simresults['Denominator']\n", "\n", "# Plot the histogram of individual parameters\n", "simresults['beta'].plot(kind='hist', density=True, bins=20)\n", "\n", "# Plot the general distribution of beta\n", "def normalpdf(v, mu=0.0, s=1.0):\n", " \"\"\"\n", " Calculate the pdf of the normal distribution, for plotting purposes.\n", "\n", " \"\"\"\n", " d = -(v - mu) * (v - mu)\n", " n = 2.0 * s * s\n", " a = d / n\n", " num = np.exp(a)\n", " den = s * 2.506628275\n", " p = num / den\n", " return p\n", "\n", "\n", "betas = results.getBetaValues(['B_TIME', 'B_TIME_S'])\n", "x = np.arange(simresults['beta'].min(), simresults['beta'].max(), 0.01)\n", "plt.plot(x, normalpdf(x, betas['B_TIME'], betas['B_TIME_S']), '-')\n", "plt.show()\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 }