{
  "cells": [
    {
      "cell_type": "code",
      "metadata": {},
      "source": [
        "#%%\n",
        "\"\"\"File 26triangularPanelMixture.py\n",
        "\n",
        ":author: Michel Bierlaire, EPFL\n",
        ":date: Mon Sep  9 10:27:40 2019\n",
        "\n",
        " Example of a mixture of logit models, using Monte-Carlo integration.\n",
        " THe micing distribution is user-defined (triangular, here).\n",
        " The datafile is organized as panel data.\n",
        " Three alternatives: Train, Car and Swissmetro\n",
        " SP data\n",
        "\"\"\"\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import biogeme.database as db\n",
        "import biogeme.biogeme as bio\n",
        "import biogeme.models as models\n",
        "import biogeme.messaging as msg\n",
        "from biogeme.expressions import Beta, DefineVariable, bioDraws, MonteCarlo, \\\n",
        "    PanelLikelihoodTrajectory, log\n",
        "\n",
        "# Read the data\n",
        "df = pd.read_csv('swissmetro.dat', '\\t')\n",
        "database = db.Database('swissmetro', df)\n",
        "\n",
        "# They are organized as panel data. The variable ID identifies each individual.\n",
        "database.panel(\"ID\")\n",
        "\n",
        "# The Pandas data structure is available as database.data. Use all the\n",
        "# Pandas functions to invesigate the database\n",
        "#print(database.data.describe())\n",
        "\n",
        "# The following statement allows you to use the names of the variable\n",
        "# as Python variable.\n",
        "globals().update(database.variables)\n",
        "\n",
        "# Removing some observations can be done directly using pandas.\n",
        "#remove = (((database.data.PURPOSE != 1) &\n",
        "#           (database.data.PURPOSE != 3)) |\n",
        "#          (database.data.CHOICE == 0))\n",
        "#database.data.drop(database.data[remove].index,inplace=True)\n",
        "\n",
        "# Here we use the \"biogeme\" way for backward compatibility\n",
        "exclude = ((PURPOSE != 1) * (PURPOSE != 3) + (CHOICE == 0)) > 0\n",
        "database.remove(exclude)\n",
        "\n",
        "def theTriangularGenerator(sampleSize, numberOfDraws):\n",
        "    \"\"\"\n",
        "    Provide my own random number generator to the database.\n",
        "    See the numpy.random documentation to obtain a list of other distributions.\n",
        "    \"\"\"\n",
        "    return np.random.triangular(-1, 0, 1, (sampleSize, numberOfDraws))\n",
        "\n",
        "myRandomNumberGenerators = {'TRIANGULAR':(theTriangularGenerator,\n",
        "                                          'Draws from a triangular distribution')}\n",
        "database.setRandomNumberGenerators(myRandomNumberGenerators)\n",
        "\n",
        "# Parameters to be estimated\n",
        "B_COST = Beta('B_COST', 0, None, None, 0)\n",
        "\n",
        "# Define a random parameter, normally distributed across individuals,\n",
        "# designed to be used for Monte-Carlo simulation\n",
        "B_TIME = Beta('B_TIME', 0, None, None, 0)\n",
        "\n",
        "# It is advised not to use 0 as starting value for the following parameter.\n",
        "B_TIME_S = Beta('B_TIME_S', 1, None, None, 0)\n",
        "B_TIME_RND = B_TIME + B_TIME_S * bioDraws('B_TIME_RND', 'TRIANGULAR')\n",
        "\n",
        "# We do the same for the constants, to address serial correlation.\n",
        "ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)\n",
        "ASC_CAR_S = Beta('ASC_CAR_S', 1, None, None, 0)\n",
        "ASC_CAR_RND = ASC_CAR + ASC_CAR_S * bioDraws('ASC_CAR_RND', 'TRIANGULAR')\n",
        "\n",
        "ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)\n",
        "ASC_TRAIN_S = Beta('ASC_TRAIN_S', 1, None, None, 0)\n",
        "ASC_TRAIN_RND = ASC_TRAIN + ASC_TRAIN_S * bioDraws('ASC_TRAIN_RND', 'TRIANGULAR')\n",
        "\n",
        "ASC_SM = Beta('ASC_SM', 0, None, None, 1)\n",
        "ASC_SM_S = Beta('ASC_SM_S', 1, None, None, 0)\n",
        "ASC_SM_RND = ASC_SM + ASC_SM_S * bioDraws('ASC_SM_RND', 'TRIANGULAR')\n",
        "\n",
        "# Definition of new variables\n",
        "SM_COST = SM_CO * (GA == 0)\n",
        "TRAIN_COST = TRAIN_CO * (GA == 0)\n",
        "\n",
        "# Definition of new variables: adding columns to the database\n",
        "CAR_AV_SP = DefineVariable('CAR_AV_SP', CAR_AV * (SP != 0), database)\n",
        "TRAIN_AV_SP = DefineVariable('TRAIN_AV_SP', TRAIN_AV * (SP != 0), database)\n",
        "TRAIN_TT_SCALED = DefineVariable('TRAIN_TT_SCALED', TRAIN_TT / 100.0, database)\n",
        "TRAIN_COST_SCALED = DefineVariable('TRAIN_COST_SCALED', TRAIN_COST / 100, database)\n",
        "SM_TT_SCALED = DefineVariable('SM_TT_SCALED', SM_TT / 100.0, database)\n",
        "SM_COST_SCALED = DefineVariable('SM_COST_SCALED', SM_COST / 100, database)\n",
        "CAR_TT_SCALED = DefineVariable('CAR_TT_SCALED', CAR_TT / 100, database)\n",
        "CAR_CO_SCALED = DefineVariable('CAR_CO_SCALED', CAR_CO / 100, database)\n",
        "\n",
        "# Definition of the utility functions\n",
        "V1 = ASC_TRAIN_RND + \\\n",
        "     B_TIME_RND * TRAIN_TT_SCALED + \\\n",
        "     B_COST * TRAIN_COST_SCALED\n",
        "V2 = ASC_SM_RND + \\\n",
        "     B_TIME_RND * SM_TT_SCALED + \\\n",
        "     B_COST * SM_COST_SCALED\n",
        "V3 = ASC_CAR_RND + \\\n",
        "     B_TIME_RND * CAR_TT_SCALED + \\\n",
        "     B_COST * CAR_CO_SCALED\n",
        "\n",
        "# Associate utility functions with the numbering of alternatives\n",
        "V = {1: V1,\n",
        "     2: V2,\n",
        "     3: V3}\n",
        "\n",
        "# Associate the availability conditions with the alternatives\n",
        "av = {1: TRAIN_AV_SP,\n",
        "      2: SM_AV,\n",
        "      3: CAR_AV_SP}\n",
        "\n",
        "# Conditional to the random parameters, the likelihood of one observation is\n",
        "# given by the logit model (called the kernel)\n",
        "obsprob = models.logit(V, av, CHOICE)\n",
        "\n",
        "# Conditional to the random parameters, the likelihood of all observations for\n",
        "# one individual (the trajectory) is the product of the likelihood of\n",
        "# each observation.\n",
        "condprobIndiv = PanelLikelihoodTrajectory(obsprob)\n",
        "\n",
        "# We integrate over the random parameters using Monte-Carlo\n",
        "logprob = log(MonteCarlo(condprobIndiv))\n",
        "\n",
        "# Define level of verbosity\n",
        "logger = msg.bioMessage()\n",
        "#logger.setSilent()\n",
        "#logger.setWarning()\n",
        "#logger.setGeneral()\n",
        "logger.setDetailed()\n",
        "#logger.setDebug()\n",
        "\n",
        "# Create the Biogeme object\n",
        "biogeme = bio.BIOGEME(database, logprob, numberOfDraws=100000)\n",
        "biogeme.modelName = '26triangularPanelMixture'\n",
        "\n",
        "# Estimate the parameters.\n",
        "results = biogeme.estimate()\n",
        "pandasResults = results.getEstimatedParameters()\n",
        "print(pandasResults)\n"
      ],
      "outputs": [],
      "execution_count": null
    }
  ],
  "metadata": {
    "anaconda-cloud": {},
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.1"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 1
}