{ "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "#%%\n", "\"\"\"File 05nestedElasticitiesCI_bootstrap.py\n", "\n", ":author: Michel Bierlaire, EPFL\n", ":date: Wed Sep 11 15:57:46 2019\n", "\n", " We use a previously estimated nested logit model.\n", " Three alternatives: public transporation, car and slow modes.\n", " RP data.\n", "\n", " We calculate disaggregate and aggregate direct arc elasticities, and\n", " the confidence intervals. The difference with\n", " 05nestedElasticitiesConfidenceIntervals is that the simulated betas\n", " used to calculated the confidence intervals are drawn from a normal\n", " distribution based on the bootstrap estimate of the variance\n", " covariance matrix.\n", "\"\"\"\n", "\n", "import sys\n", "import numpy as np\n", "import pandas as pd\n", "import biogeme.database as db\n", "import biogeme.biogeme as bio\n", "import biogeme.models as models\n", "import biogeme.results as res\n", "from biogeme.expressions import Beta\n", "\n", "# Calculate confidence intervals for elasticities requires interval arithmetics\n", "# Use 'pip install pyinterval' if not available on your system.\n", "# Warning: other types of interval packages are also available.\n", "try:\n", " import interval as ia\n", "except ModuleNotFoundError:\n", " print('Use \"pip install pyinterval\" to install a requested package')\n", " sys.exit()\n", "\n", "# Read the data\n", "df = pd.read_csv('optima.dat', sep='\\t')\n", "database = db.Database('optima', df)\n", "\n", "# The following statement allows you to use the names of the variable\n", "# as Python variable.\n", "globals().update(database.variables)\n", "\n", "# Exclude observations such that the chosen alternative is -1\n", "database.remove(Choice == -1.0)\n", "\n", "# Normalize the weights\n", "sumWeight = database.data['Weight'].sum()\n", "numberOfRows = database.data.shape[0]\n", "normalizedWeight = Weight * numberOfRows / sumWeight\n", "\n", "# Calculate the number of accurences of a value in the database\n", "numberOfMales = database.count('Gender', 1)\n", "print(f'Number of males: {numberOfMales}')\n", "\n", "numberOfFemales = database.count('Gender', 2)\n", "print(f'Number of females: {numberOfFemales}')\n", "\n", "# For more complex conditions, using directly Pandas\n", "unreportedGender = database.data[\n", " (database.data['Gender'] != 1) & (database.data['Gender'] != 2)\n", "].count()['Gender']\n", "print(f'Unreported gender: {unreportedGender}')\n", "\n", "# List of parameters. Their value will be set later.\n", "ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)\n", "ASC_PT = Beta('ASC_PT', 0, None, None, 1)\n", "ASC_SM = Beta('ASC_SM', 0, None, None, 0)\n", "BETA_TIME_FULLTIME = Beta('BETA_TIME_FULLTIME', 0, None, None, 0)\n", "BETA_TIME_OTHER = Beta('BETA_TIME_OTHER', 0, None, None, 0)\n", "BETA_DIST_MALE = Beta('BETA_DIST_MALE', 0, None, None, 0)\n", "BETA_DIST_FEMALE = Beta('BETA_DIST_FEMALE', 0, None, None, 0)\n", "BETA_DIST_UNREPORTED = Beta('BETA_DIST_UNREPORTED', 0, None, None, 0)\n", "BETA_COST = Beta('BETA_COST', 0, None, None, 0)\n", "\n", "# Define new variables. Must be consistent with estimation results.\n", "TimePT_scaled = TimePT / 200\n", "TimeCar_scaled = TimeCar / 200\n", "MarginalCostPT_scaled = MarginalCostPT / 10\n", "CostCarCHF_scaled = CostCarCHF / 10\n", "distance_km_scaled = distance_km / 5\n", "male = Gender == 1\n", "female = Gender == 2\n", "unreportedGender = Gender == -1\n", "fulltime = OccupStat == 1\n", "notfulltime = OccupStat != 1\n", "\n", "# Definition of utility functions:\n", "V_PT = (\n", " ASC_PT\n", " + BETA_TIME_FULLTIME * TimePT_scaled * fulltime\n", " + BETA_TIME_OTHER * TimePT_scaled * notfulltime\n", " + BETA_COST * MarginalCostPT_scaled\n", ")\n", "V_CAR = (\n", " ASC_CAR\n", " + BETA_TIME_FULLTIME * TimeCar_scaled * fulltime\n", " + BETA_TIME_OTHER * TimeCar_scaled * notfulltime\n", " + BETA_COST * CostCarCHF_scaled\n", ")\n", "V_SM = (\n", " ASC_SM\n", " + BETA_DIST_MALE * distance_km_scaled * male\n", " + BETA_DIST_FEMALE * distance_km_scaled * female\n", " + BETA_DIST_UNREPORTED * distance_km_scaled * unreportedGender\n", ")\n", "\n", "# Associate utility functions with the numbering of alternatives\n", "V = {0: V_PT, 1: V_CAR, 2: V_SM}\n", "\n", "# Definition of the nests:\n", "# 1: nests parameter\n", "# 2: list of alternatives\n", "\n", "MU_NOCAR = Beta('MU_NOCAR', 1.0, 1.0, None, 0)\n", "CAR_NEST = 1.0, [1]\n", "NO_CAR_NEST = MU_NOCAR, [0, 2]\n", "nests = CAR_NEST, NO_CAR_NEST\n", "\n", "# The choice model is a nested logit\n", "prob_pt = models.nested(V, None, nests, 0)\n", "prob_car = models.nested(V, None, nests, 1)\n", "prob_sm = models.nested(V, None, nests, 2)\n", "\n", "# We investigate a scenario where the distance increases by one kilometer.\n", "delta_dist = 1.0\n", "distance_km_scaled_after = (distance_km + delta_dist) / 5\n", "\n", "# Utility of the slow mode whem the distance increases by 1 kilometer.\n", "V_SM_after = (\n", " ASC_SM\n", " + BETA_DIST_MALE * distance_km_scaled_after * male\n", " + BETA_DIST_FEMALE * distance_km_scaled_after * female\n", " + BETA_DIST_UNREPORTED * distance_km_scaled_after * unreportedGender\n", ")\n", "\n", "# Associate utility functions with the numbering of alternatives\n", "V_after = {0: V_PT, 1: V_CAR, 2: V_SM_after}\n", "\n", "# Definition of the nests:\n", "# 1: nests parameter\n", "# 2: list of alternatives\n", "\n", "prob_sm_after = models.nested(V_after, None, nests, 2)\n", "\n", "direct_elas_sm_dist = (\n", " (prob_sm_after - prob_sm) * distance_km / (prob_sm * delta_dist)\n", ")\n", "\n", "simulate = {\n", " 'weight': normalizedWeight,\n", " 'Prob. slow modes': prob_sm,\n", " 'direct_elas_sm_dist': direct_elas_sm_dist,\n", "}\n", "\n", "biogeme = bio.BIOGEME(database, simulate)\n", "biogeme.modelName = '05nestedElasticitiesCI_Bootstrap'\n", "\n", "# Read the estimation results from the file\n", "results = res.bioResults(pickleFile='01nestedEstimation.pickle')\n", "\n", "# simulatedValues is a Panda dataframe with the same number of rows as\n", "# the database, and as many columns as formulas to simulate.\n", "simulatedValues = biogeme.simulate(results.getBetaValues())\n", "\n", "# We calculate the elasticities\n", "simulatedValues['Weighted prob. slow modes'] = (\n", " simulatedValues['weight'] * simulatedValues['Prob. slow modes']\n", ")\n", "\n", "denominator_sm = simulatedValues['Weighted prob. slow modes'].sum()\n", "\n", "direct_elas_sm_dist = (\n", " simulatedValues['Weighted prob. slow modes']\n", " * simulatedValues['direct_elas_sm_dist']\n", " / denominator_sm\n", ").sum()\n", "print(\n", " f'Aggregate direct elasticity of slow modes wrt distance: '\n", " f'{direct_elas_sm_dist:.7f}'\n", ")\n", "\n", "\n", "print('Calculating confidence interval...')\n", "\n", "# Calculate confidence intervals\n", "# Draw values of betas from the distribution of the estimator. The\n", "# bootstrap estimate of the variance-covariance matrix is used.\n", "\n", "size = 100\n", "\n", "# All betas are simulated\n", "simulatedBetas = np.random.multivariate_normal(\n", " results.data.betaValues, results.data.bootstrap_varCovar, size\n", ")\n", "\n", "# Only those necessary for the simulation are selected.\n", "# The values are also associated with their name\n", "index = [results.data.betaNames.index(b) for b in biogeme.freeBetaNames]\n", "b = [\n", " {biogeme.freeBetaNames[i]: value for i, value in enumerate(row)}\n", " for row in simulatedBetas[:, index]\n", "]\n", "\n", "# Returns data frame containing, for each simulated value, the left\n", "# and right bounds of the confidence interval calculated by\n", "# simulation.\n", "left, right = biogeme.confidenceIntervals(b, 0.9)\n", "\n", "left['Weighted prob. slow modes'] = left['weight'] * left['Prob. slow modes']\n", "right['Weighted prob. slow modes'] = (\n", " right['weight'] * right['Prob. slow modes']\n", ")\n", "denominator_left = left['Weighted prob. slow modes'].sum()\n", "denominator_right = right['Weighted prob. slow modes'].sum()\n", "\n", "# Build an interval object for the denominator\n", "denominator_interval = ia.interval[(denominator_left, denominator_right)]\n", "\n", "# Build a list of interval objects, one for each disaggregate elasticity\n", "elas_interval = [\n", " ia.interval([l, r])\n", " for l, r in zip(left['direct_elas_sm_dist'], right['direct_elas_sm_dist'])\n", "]\n", "\n", "# Build a list of interval objects, one for each term of the numerator\n", "numerator_interval = [\n", " ia.interval([l, r])\n", " for l, r in zip(\n", " left['Weighted prob. slow modes'], right['Weighted prob. slow modes']\n", " )\n", "]\n", "\n", "# Build a list of interval objects, one for each term of the sum\n", "terms_of_the_sum_interval = [\n", " e * wp / denominator_interval\n", " for e, wp in zip(elas_interval, numerator_interval)\n", "]\n", "\n", "# The interval package apparently does not provide a tool to sum a\n", "# list of intervals. We do it manually. Note that the object interval\n", "# contains a list of ranges (to allow the modeling of disconnected\n", "# intervals). The first of these ranges is x[0], and we access the inf\n", "# and sup values of this range.\n", "sum_interval_left = sum(x[0].inf for x in terms_of_the_sum_interval)\n", "sum_interval_right = sum(x[0].sup for x in terms_of_the_sum_interval)\n", "\n", "print(f'[{sum_interval_left}, {sum_interval_right}]')\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 }