{ "cells": [ { "cell_type": "code", "metadata": {}, "source": [ "#%%\n", "\"\"\"File 01logitBis.py\n", "\n", ":author: Michel Bierlaire, EPFL\n", ":date: Thu Sep 6 15:14:39 2018\n", "\n", " Example of a logit model.\n", "\n", "Same as 01logit, using bioLinearUtility, and introducing some options\n", " and features. Three alternatives: Train, Car and Swissmetro SP data\n", "\n", "\"\"\"\n", "import pandas as pd\n", "\n", "import biogeme.biogeme as bio\n", "import biogeme.database as db\n", "from biogeme import models\n", "import biogeme.optimization as opt\n", "import biogeme.messaging as msg\n", "from biogeme.expressions import Beta, DefineVariable, bioLinearUtility\n", "\n", "# Read the data\n", "df = pd.read_csv('swissmetro.dat', sep='\\t')\n", "database = db.Database('swissmetro', df)\n", "\n", "# The Pandas data structure is available as database.data. Use all the\n", "# Pandas functions to investigate the database. For example:\n", "# print(database.data.describe())\n", "\n", "# The following statement allows you to use the names of the variable\n", "# as Python variable.\n", "globals().update(database.variables)\n", "\n", "# Removing some observations can be done directly using pandas.\n", "# remove = (((database.data.PURPOSE != 1) &\n", "# (database.data.PURPOSE != 3)) |\n", "# (database.data.CHOICE == 0))\n", "# database.data.drop(database.data[remove].index,inplace=True)\n", "\n", "# Here we use the \"biogeme\" way for backward compatibility\n", "exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0\n", "database.remove(exclude)\n", "\n", "# Parameters to be estimated\n", "ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)\n", "ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)\n", "ASC_SM = Beta('ASC_SM', 0, None, None, 1)\n", "\n", "# We use starting values estimated from a previous run\n", "B_TIME = Beta('B_TIME', -1.28, None, None, 0)\n", "B_COST = Beta('B_COST', -1.08, None, None, 0)\n", "\n", "# Definition of new variables\n", "SM_COST = SM_CO * (GA == 0)\n", "TRAIN_COST = TRAIN_CO * (GA == 0)\n", "\n", "# Definition of new variables by adding columns to the database.\n", "# This is recommended for estimation. And not recommended for simulation.\n", "CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV * (SP != 0), database)\n", "TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV * (SP != 0), database)\n", "TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED', TRAIN_TT / 100.0, database)\n", "TRAIN_COST_SCALED = DefineVariable(\n", " 'TRAIN_COST_SCALED', TRAIN_COST / 100, database\n", ")\n", "SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0, database)\n", "SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100, database)\n", "CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100, database)\n", "CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100, database)\n", "\n", "# Definition of the utility functions\n", "terms1 = [(B_TIME, TRAIN_TT_SCALED), (B_COST, TRAIN_COST_SCALED)]\n", "V1 = ASC_TRAIN + bioLinearUtility(terms1)\n", "\n", "terms2 = [(B_TIME, SM_TT_SCALED), (B_COST, SM_COST_SCALED)]\n", "V2 = ASC_SM + bioLinearUtility(terms2)\n", "\n", "terms3 = [(B_TIME, CAR_TT_SCALED), (B_COST, CAR_CO_SCALED)]\n", "V3 = ASC_CAR + bioLinearUtility(terms3)\n", "\n", "# Associate utility functions with the numbering of alternatives\n", "V = {1: V1, 2: V2, 3: V3}\n", "\n", "# Associate the availability conditions with the alternatives\n", "av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}\n", "\n", "# Definition of the model. This is the contribution of each\n", "# observation to the log likelihood function.\n", "logprob = models.loglogit(V, av, CHOICE)\n", "\n", "# Define level of verbosity\n", "logger = msg.bioMessage()\n", "# logger.setSilent()\n", "# logger.setWarning()\n", "logger.setGeneral()\n", "# logger.setDetailed()\n", "\n", "\n", "# These notes will be included as such in the report file.\n", "userNotes = (\n", " 'Example of a logit model with three alternatives: Train, Car and'\n", " ' Swissmetro. Same as 01logit, using bioLinearUtility, and '\n", " 'introducing some options and features.'\n", ")\n", "\n", "# Create the Biogeme object\n", "biogeme = bio.BIOGEME(\n", " database, logprob, numberOfThreads=2, userNotes=userNotes\n", ")\n", "\n", "# As we have used starting values different from 0, the initial model\n", "# is not the equal probability model. If we want to include the latter\n", "# in the results, we need to calculate its log likelihood.\n", "biogeme.calculateNullLoglikelihood(av)\n", "\n", "biogeme.modelName = '01logitBis'\n", "biogeme.saveIterations = False\n", "\n", "# Estimate the parameters\n", "results = biogeme.estimate(\n", " bootstrap=100,\n", " algorithm=opt.bioNewton,\n", " algoParameters={'maxiter': 1000},\n", ")\n", "\n", "biogeme.createLogFile(verbosity=3)\n", "\n", "# Get the results in a pandas table\n", "print('Parameters')\n", "print('----------')\n", "pandasResults = results.getEstimatedParameters()\n", "print(pandasResults)\n", "\n", "# Get general statistics\n", "print('General statistics')\n", "print('------------------')\n", "stats = results.getGeneralStatistics()\n", "for description, (value, formatting) in stats.items():\n", " print(f'{description}: {value:{formatting}}')\n", "\n", "# Messages from the optimization algorithm\n", "print('Optimization algorithm')\n", "print('----------------------')\n", "for description, message in results.data.optimizationMessages.items():\n", " print(f'{description}:\\t{message}')\n", "\n", "# Generate the file in Alogit format\n", "results.writeF12(robustStdErr=True)\n", "results.writeF12(robustStdErr=False)\n" ], "outputs": [], "execution_count": null } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 4 }