{ "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "#%%\n", "\"\"\"File 21probit.py\n", "\n", ":author: Michel Bierlaire, EPFL\n", ":date: Mon Sep 9 10:14:57 2019\n", "\n", " Example of a binary probit model.\n", " Two alternatives: Train and Car\n", " SP data\n", "\"\"\"\n", "\n", "import pandas as pd\n", "import biogeme.database as db\n", "import biogeme.biogeme as bio\n", "import biogeme.messaging as msg\n", "from biogeme.expressions import Beta, DefineVariable, bioNormalCdf, Elem, log\n", "\n", "# Read the data\n", "df = pd.read_csv('swissmetro.dat', sep='\\t')\n", "database = db.Database('swissmetro', df)\n", "\n", "# The Pandas data structure is available as database.data. Use all the\n", "# Pandas functions to invesigate the database\n", "# print(database.data.describe())\n", "\n", "# The following statement allows you to use the names of the variable\n", "# as Python variable.\n", "globals().update(database.variables)\n", "\n", "# Removing some observations can be done directly using pandas.\n", "# remove = (((database.data.PURPOSE != 1) &\n", "# (database.data.PURPOSE != 3)) |\n", "# (database.data.CHOICE == 0))\n", "# database.data.drop(database.data[remove].index,inplace=True)\n", "\n", "# Here we use the \"biogeme\" way for backward compatibility. As we\n", "# estimate a binary model, we remove observations where Swissmetro was\n", "# chosen (CHOICE == 2). We also remove observations where one of the\n", "# two alternatives is not available.\n", "\n", "CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV * (SP != 0), database)\n", "TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV * (SP != 0), database)\n", "exclude = (TRAIN_AV_SP == 0) + (CAR_AV_SP == 0) + (CHOICE == 2) + (\n", " (PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)\n", ") > 0\n", "database.remove(exclude)\n", "\n", "# Parameters to be estimated\n", "ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)\n", "B_TIME = Beta('B_TIME', 0, None, None, 0)\n", "B_COST = Beta('B_COST', 0, None, None, 0)\n", "\n", "# Definition of new variables\n", "TRAIN_COST = TRAIN_CO * (GA == 0)\n", "\n", "# Definition of new variables by adding columns to the database.\n", "# This is recommended for estimation. And not recommended for simulation.\n", "TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED', TRAIN_TT / 100.0, database)\n", "TRAIN_COST_SCALED = DefineVariable(\n", " 'TRAIN_COST_SCALED', TRAIN_COST / 100, database\n", ")\n", "CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100, database)\n", "CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100, database)\n", "\n", "# Definition of the utility functions\n", "# We estimate a binary probit model. There are only two alternatives.\n", "V1 = B_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED\n", "V3 = ASC_CAR + B_TIME * CAR_TT_SCALED + B_COST * CAR_CO_SCALED\n", "\n", "# Associate choice probability with the numbering of alternatives\n", "P = {1: bioNormalCdf(V1 - V3), 3: bioNormalCdf(V3 - V1)}\n", "\n", "# Definition of the model. This is the contribution of each\n", "# observation to the log likelihood function.\n", "logprob = log(Elem(P, CHOICE))\n", "\n", "# Define level of verbosity\n", "logger = msg.bioMessage()\n", "logger.setSilent()\n", "# logger.setWarning()\n", "# logger.setGeneral()\n", "# logger.setDetailed()\n", "\n", "# Create the Biogeme object\n", "biogeme = bio.BIOGEME(database, logprob)\n", "biogeme.modelName = '21probit'\n", "\n", "# Estimate the parameters\n", "results = biogeme.estimate()\n", "pandasResults = results.getEstimatedParameters()\n", "print(pandasResults)\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 }