{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Seeds: Random effect logistic regression\n",
      "\n",
      "Ported to PyMC3 by Abraham Flaxman, 2/16/2014 from [PyMC2 version](http://nbviewer.ipython.org/github/aflaxman/pymc-examples/blob/master/seeds_re_logistic_regression.ipynb), based on http://www.openbugs.info/Examples/Seeds.html\n",
      "\n"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np, pymc as pm, theano.tensor as T"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Warning: statsmodels and/or patsy not found, not importing glm submodule.\n"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "### data - same as PyMC2 version\n",
      "# germinated seeds\n",
      "r =  np.array([10, 23, 23, 26, 17, 5, 53, 55, 32, 46, 10, 8, 10, 8, 23, 0, 3, 22, 15, 32, 3])\n",
      "\n",
      "# total seeds\n",
      "n =  np.array([39, 62, 81, 51, 39, 6, 74, 72, 51, 79, 13, 16, 30, 28, 45, 4, 12, 41, 30, 51, 7])\n",
      "\n",
      "# seed type\n",
      "x1 = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])\n",
      "\n",
      "# root type\n",
      "x2 = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1])\n",
      "\n",
      "# number of plates\n",
      "N =  x1.shape[0]\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "### model - major differences from PyMC2:\n",
      "###   * with model idiom\n",
      "###   * initial values are not specified when constructing stochastics\n",
      "###   * keyword `shape` instead of `size`\n",
      "###   * Theano tensors instead of Lambda deterministics\n",
      "###   * `observed` parameter takes a value instead of a boolean\n",
      "\n",
      "with pm.Model() as m:\n",
      "    ### hyperpriors\n",
      "    tau = pm.Gamma('tau', 1.e-3, 1.e-3)\n",
      "    sigma = tau**-.5\n",
      "    \n",
      "    ### parameters\n",
      "    # fixed effects\n",
      "    alpha_0 = pm.Normal('alpha_0', 0., 1e-6)\n",
      "    alpha_1 = pm.Normal('alpha_1', 0., 1e-6) \n",
      "    alpha_2 = pm.Normal('alpha_2', 0., 1e-6) \n",
      "    alpha_12 = pm.Normal('alpha_12', 0., 1e-6)  \n",
      "    \n",
      "    # random effect\n",
      "    b = pm.Normal('b', 0., tau,  shape=(N,))\n",
      "    \n",
      "    # expected parameter\n",
      "    logit_p =  (alpha_0 + alpha_1*x1 + alpha_2*x2 + alpha_12*x1*x2 + b)\n",
      "    p = T.exp(logit_p) / (1 + T.exp(logit_p))\n",
      "    \n",
      "    ### likelihood\n",
      "    obs = pm.Binomial('obs', n, p, observed=r)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "WARNING (theano.gof.compilelock): Overriding existing lock by dead process '3067' (I am process '3935')\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%%time\n",
      "\n",
      "### inference - major differences from PyMC2:\n",
      "###   * find_MAP function instead of MAP object\n",
      "###   * initial values specified in inference functions\n",
      "###   * selection of MCMC step methods is more explicit\n",
      "###   * sampling from parallel chains is super-simple\n",
      "\n",
      "n = 2000\n",
      "\n",
      "with m:\n",
      "    start = pm.find_MAP({'tau': 10., 'alpha_0': 0., 'alpha_1': 0., 'alpha_2': 0., 'alpha_12': 0., 'b': np.zeros(N)})\n",
      "    step = pm.HamiltonianMC(scaling=start)\n",
      "    \n",
      "    ptrace = pm.psample(n, step, start, progressbar=False, threads=4)\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stderr",
       "text": [
        "/homes/abie/anaconda/lib/python2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/scan_module/scan_perform_ext.py:85: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility\n",
        "  from scan_perform.scan_perform import *\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "CPU times: user 1min 3s, sys: 10.2 s, total: 1min 13s\n",
        "Wall time: 1min 55s\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "BUGS results:\n",
      "\n",
      "A burn in of 1000 updates followed by a further 10000 updates gave the following parameter estimates:\n",
      "\n",
      "               mean    sd    \n",
      "     alpha_0   -0.55   0.19  \n",
      "     alpha_1    0.08   0.30  \n",
      "     alpha_12  -0.82   0.41  \n",
      "     alpha_2    1.35   0.26  \n",
      "     sigma      0.27   0.15  \n"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "burn = 1000\n",
      "pm.summary(ptrace[burn:])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "tau:\n",
        " \n",
        "  Mean             SD               MC Error         95% HPD interval\n",
        "  -------------------------------------------------------------------\n",
        "  1106.713         660.327          38.079           [368.521, 2503.479]\n",
        "\n",
        "  Posterior quantiles:\n",
        "  2.5            25             50             75             97.5\n",
        "  |--------------|==============|==============|--------------|\n",
        "  411.338        648.379        950.014        1300.858       3030.646\n",
        "\n",
        "\n",
        "alpha_0:\n",
        " \n",
        "  Mean             SD               MC Error         95% HPD interval\n",
        "  -------------------------------------------------------------------\n",
        "  -0.574           0.127            0.009            [-0.814, -0.292]\n",
        "\n",
        "  Posterior quantiles:\n",
        "  2.5            25             50             75             97.5\n",
        "  |--------------|==============|==============|--------------|\n",
        "  -0.885         -0.647         -0.567         -0.478         -0.342\n",
        "\n",
        "\n",
        "alpha_1:\n",
        " \n",
        "  Mean             SD               MC Error         95% HPD interval\n",
        "  -------------------------------------------------------------------\n",
        "  0.175            0.219            0.014            [-0.276, 0.599]\n",
        "\n",
        "  Posterior quantiles:\n",
        "  2.5            25             50             75             97.5\n",
        "  |--------------|==============|==============|--------------|\n",
        "  -0.282         0.040          0.175          0.328          0.597\n",
        "\n",
        "\n",
        "alpha_2:\n",
        " \n",
        "  Mean             SD               MC Error         95% HPD interval\n",
        "  -------------------------------------------------------------------\n",
        "  1.353            0.167            0.010            [0.984, 1.627]\n",
        "\n",
        "  Posterior quantiles:\n",
        "  2.5            25             50             75             97.5\n",
        "  |--------------|==============|==============|--------------|\n",
        "  1.003          1.242          1.351          1.468          1.671\n",
        "\n",
        "\n",
        "alpha_12:\n",
        " \n",
        "  Mean             SD               MC Error         95% HPD interval\n",
        "  -------------------------------------------------------------------\n",
        "  -0.836           0.305            0.020            [-1.413, -0.226]\n",
        "\n",
        "  Posterior quantiles:\n",
        "  2.5            25             50             75             97.5\n",
        "  |--------------|==============|==============|--------------|\n",
        "  -1.413         -1.048         -0.851         -0.631         -0.226\n",
        "\n",
        "\n",
        "b:\n",
        " \n",
        "  Mean             SD               MC Error         95% HPD interval\n",
        "  -------------------------------------------------------------------\n",
        "  -0.000           0.030            0.002            [-0.060, 0.063]\n",
        "  -0.002           0.032            0.002            [-0.058, 0.065]\n",
        "  -0.008           0.030            0.002            [-0.070, 0.047]\n",
        "  0.008            0.032            0.002            [-0.051, 0.068]\n",
        "  0.010            0.032            0.002            [-0.044, 0.073]\n",
        "  0.003            0.034            0.003            [-0.069, 0.064]\n",
        "  -0.001           0.032            0.002            [-0.061, 0.069]\n",
        "  0.009            0.034            0.003            [-0.058, 0.086]\n",
        "  -0.001           0.034            0.003            [-0.058, 0.067]\n",
        "  -0.012           0.031            0.002            [-0.068, 0.053]\n",
        "  0.003            0.030            0.002            [-0.062, 0.056]\n",
        "  0.001            0.030            0.002            [-0.053, 0.063]\n",
        "  0.002            0.032            0.002            [-0.055, 0.070]\n",
        "  -0.006           0.025            0.002            [-0.057, 0.039]\n",
        "  0.003            0.031            0.002            [-0.055, 0.063]\n",
        "  0.002            0.035            0.003            [-0.052, 0.085]\n",
        "  -0.002           0.032            0.002            [-0.069, 0.052]\n",
        "  -0.001           0.033            0.003            [-0.062, 0.067]\n",
        "  -0.003           0.032            0.002            [-0.066, 0.056]\n",
        "  0.003            0.031            0.002            [-0.063, 0.062]\n",
        "  0.003            0.033            0.002            [-0.062, 0.058]\n"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "  Posterior quantiles:\n",
        "  2.5            25             50             75             97.5\n",
        "  |--------------|==============|==============|--------------|\n",
        "  -0.066         -0.021         -0.000         0.018          0.062\n",
        "  -0.085         -0.020         -0.003         0.019          0.060\n",
        "  -0.070         -0.029         -0.006         0.013          0.049\n",
        "  -0.050         -0.014         0.005          0.029          0.073\n",
        "  -0.059         -0.011         0.012          0.033          0.065\n",
        "  -0.069         -0.022         0.003          0.023          0.072\n",
        "  -0.082         -0.017         0.001          0.019          0.057\n",
        "  -0.073         -0.011         0.012          0.027          0.079\n",
        "  -0.064         -0.024         -0.001         0.023          0.067\n",
        "  -0.080         -0.035         -0.011         0.010          0.047\n",
        "  -0.062         -0.017         0.004          0.022          0.059\n",
        "  -0.060         -0.020         0.003          0.024          0.058\n",
        "  -0.055         -0.017         -0.002         0.020          0.070\n",
        "  -0.053         -0.022         -0.004         0.010          0.048\n",
        "  -0.057         -0.019         0.004          0.025          0.063\n",
        "  -0.071         -0.019         0.000          0.021          0.085\n",
        "  -0.069         -0.021         -0.003         0.022          0.061\n",
        "  -0.069         -0.024         0.001          0.020          0.065\n",
        "  -0.067         -0.025         -0.001         0.018          0.056\n",
        "  -0.062         -0.017         0.002          0.022          0.070\n",
        "  -0.062         -0.018         0.003          0.026          0.058\n",
        "\n"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Gelman-Rubin convergence diagnostics:"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pm.gelman_rubin(ptrace[burn:])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 6,
       "text": [
        "{'alpha_0': 1.0194130588867218,\n",
        " 'alpha_1': 1.0042532712404557,\n",
        " 'alpha_12': 1.0041204872403353,\n",
        " 'alpha_2': 1.0037001462197992,\n",
        " 'b': array([ 1.01304092,  1.01520162,  1.01626442,  1.03409233,  1.01195989,\n",
        "         1.01161977,  1.06471208,  1.00517131,  1.04701133,  1.01351903,\n",
        "         1.02305173,  1.01105302,  1.00656117,  1.00287305,  1.04569727,\n",
        "         1.0372103 ,  1.02239126,  1.01456888,  1.01328597,  1.04200797,\n",
        "         1.09234026]),\n",
        " 'tau': 1.0064134954958157}"
       ]
      }
     ],
     "prompt_number": 6
    }
   ],
   "metadata": {}
  }
 ]
}