{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Seeds: Random effect logistic regression\n", "\n", "Ported to PyMC3 by Abraham Flaxman, 2/16/2014 from [PyMC2 version](http://nbviewer.ipython.org/github/aflaxman/pymc-examples/blob/master/seeds_re_logistic_regression.ipynb), based on http://www.openbugs.info/Examples/Seeds.html\n", "\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np, pymc as pm, theano.tensor as T" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Warning: statsmodels and/or patsy not found, not importing glm submodule.\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "### data - same as PyMC2 version\n", "# germinated seeds\n", "r = np.array([10, 23, 23, 26, 17, 5, 53, 55, 32, 46, 10, 8, 10, 8, 23, 0, 3, 22, 15, 32, 3])\n", "\n", "# total seeds\n", "n = np.array([39, 62, 81, 51, 39, 6, 74, 72, 51, 79, 13, 16, 30, 28, 45, 4, 12, 41, 30, 51, 7])\n", "\n", "# seed type\n", "x1 = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])\n", "\n", "# root type\n", "x2 = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1])\n", "\n", "# number of plates\n", "N = x1.shape[0]\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "### model - major differences from PyMC2:\n", "### * with model idiom\n", "### * initial values are not specified when constructing stochastics\n", "### * keyword `shape` instead of `size`\n", "### * Theano tensors instead of Lambda deterministics\n", "### * `observed` parameter takes a value instead of a boolean\n", "\n", "with pm.Model() as m:\n", " ### hyperpriors\n", " tau = pm.Gamma('tau', 1.e-3, 1.e-3)\n", " sigma = tau**-.5\n", " \n", " ### parameters\n", " # fixed effects\n", " alpha_0 = pm.Normal('alpha_0', 0., 1e-6)\n", " alpha_1 = pm.Normal('alpha_1', 0., 1e-6) \n", " alpha_2 = pm.Normal('alpha_2', 0., 1e-6) \n", " alpha_12 = pm.Normal('alpha_12', 0., 1e-6) \n", " \n", " # random effect\n", " b = pm.Normal('b', 0., tau, shape=(N,))\n", " \n", " # expected parameter\n", " logit_p = (alpha_0 + alpha_1*x1 + alpha_2*x2 + alpha_12*x1*x2 + b)\n", " p = T.exp(logit_p) / (1 + T.exp(logit_p))\n", " \n", " ### likelihood\n", " obs = pm.Binomial('obs', n, p, observed=r)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "WARNING (theano.gof.compilelock): Overriding existing lock by dead process '3067' (I am process '3935')\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "%%time\n", "\n", "### inference - major differences from PyMC2:\n", "### * find_MAP function instead of MAP object\n", "### * initial values specified in inference functions\n", "### * selection of MCMC step methods is more explicit\n", "### * sampling from parallel chains is super-simple\n", "\n", "n = 2000\n", "\n", "with m:\n", " start = pm.find_MAP({'tau': 10., 'alpha_0': 0., 'alpha_1': 0., 'alpha_2': 0., 'alpha_12': 0., 'b': np.zeros(N)})\n", " step = pm.HamiltonianMC(scaling=start)\n", " \n", " ptrace = pm.psample(n, step, start, progressbar=False, threads=4)\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ "/homes/abie/anaconda/lib/python2.7/site-packages/Theano-0.6.0-py2.7.egg/theano/scan_module/scan_perform_ext.py:85: RuntimeWarning: numpy.ndarray size changed, may indicate binary incompatibility\n", " from scan_perform.scan_perform import *\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "CPU times: user 1min 3s, sys: 10.2 s, total: 1min 13s\n", "Wall time: 1min 55s\n" ] } ], "prompt_number": 4 }, { "cell_type": "markdown", "metadata": {}, "source": [ "BUGS results:\n", "\n", "A burn in of 1000 updates followed by a further 10000 updates gave the following parameter estimates:\n", "\n", " mean sd \n", " alpha_0 -0.55 0.19 \n", " alpha_1 0.08 0.30 \n", " alpha_12 -0.82 0.41 \n", " alpha_2 1.35 0.26 \n", " sigma 0.27 0.15 \n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "burn = 1000\n", "pm.summary(ptrace[burn:])" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\n", "tau:\n", " \n", " Mean SD MC Error 95% HPD interval\n", " -------------------------------------------------------------------\n", " 1106.713 660.327 38.079 [368.521, 2503.479]\n", "\n", " Posterior quantiles:\n", " 2.5 25 50 75 97.5\n", " |--------------|==============|==============|--------------|\n", " 411.338 648.379 950.014 1300.858 3030.646\n", "\n", "\n", "alpha_0:\n", " \n", " Mean SD MC Error 95% HPD interval\n", " -------------------------------------------------------------------\n", " -0.574 0.127 0.009 [-0.814, -0.292]\n", "\n", " Posterior quantiles:\n", " 2.5 25 50 75 97.5\n", " |--------------|==============|==============|--------------|\n", " -0.885 -0.647 -0.567 -0.478 -0.342\n", "\n", "\n", "alpha_1:\n", " \n", " Mean SD MC Error 95% HPD interval\n", " -------------------------------------------------------------------\n", " 0.175 0.219 0.014 [-0.276, 0.599]\n", "\n", " Posterior quantiles:\n", " 2.5 25 50 75 97.5\n", " |--------------|==============|==============|--------------|\n", " -0.282 0.040 0.175 0.328 0.597\n", "\n", "\n", "alpha_2:\n", " \n", " Mean SD MC Error 95% HPD interval\n", " -------------------------------------------------------------------\n", " 1.353 0.167 0.010 [0.984, 1.627]\n", "\n", " Posterior quantiles:\n", " 2.5 25 50 75 97.5\n", " |--------------|==============|==============|--------------|\n", " 1.003 1.242 1.351 1.468 1.671\n", "\n", "\n", "alpha_12:\n", " \n", " Mean SD MC Error 95% HPD interval\n", " -------------------------------------------------------------------\n", " -0.836 0.305 0.020 [-1.413, -0.226]\n", "\n", " Posterior quantiles:\n", " 2.5 25 50 75 97.5\n", " |--------------|==============|==============|--------------|\n", " -1.413 -1.048 -0.851 -0.631 -0.226\n", "\n", "\n", "b:\n", " \n", " Mean SD MC Error 95% HPD interval\n", " -------------------------------------------------------------------\n", " -0.000 0.030 0.002 [-0.060, 0.063]\n", " -0.002 0.032 0.002 [-0.058, 0.065]\n", " -0.008 0.030 0.002 [-0.070, 0.047]\n", " 0.008 0.032 0.002 [-0.051, 0.068]\n", " 0.010 0.032 0.002 [-0.044, 0.073]\n", " 0.003 0.034 0.003 [-0.069, 0.064]\n", " -0.001 0.032 0.002 [-0.061, 0.069]\n", " 0.009 0.034 0.003 [-0.058, 0.086]\n", " -0.001 0.034 0.003 [-0.058, 0.067]\n", " -0.012 0.031 0.002 [-0.068, 0.053]\n", " 0.003 0.030 0.002 [-0.062, 0.056]\n", " 0.001 0.030 0.002 [-0.053, 0.063]\n", " 0.002 0.032 0.002 [-0.055, 0.070]\n", " -0.006 0.025 0.002 [-0.057, 0.039]\n", " 0.003 0.031 0.002 [-0.055, 0.063]\n", " 0.002 0.035 0.003 [-0.052, 0.085]\n", " -0.002 0.032 0.002 [-0.069, 0.052]\n", " -0.001 0.033 0.003 [-0.062, 0.067]\n", " -0.003 0.032 0.002 [-0.066, 0.056]\n", " 0.003 0.031 0.002 [-0.063, 0.062]\n", " 0.003 0.033 0.002 [-0.062, 0.058]\n" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", " Posterior quantiles:\n", " 2.5 25 50 75 97.5\n", " |--------------|==============|==============|--------------|\n", " -0.066 -0.021 -0.000 0.018 0.062\n", " -0.085 -0.020 -0.003 0.019 0.060\n", " -0.070 -0.029 -0.006 0.013 0.049\n", " -0.050 -0.014 0.005 0.029 0.073\n", " -0.059 -0.011 0.012 0.033 0.065\n", " -0.069 -0.022 0.003 0.023 0.072\n", " -0.082 -0.017 0.001 0.019 0.057\n", " -0.073 -0.011 0.012 0.027 0.079\n", " -0.064 -0.024 -0.001 0.023 0.067\n", " -0.080 -0.035 -0.011 0.010 0.047\n", " -0.062 -0.017 0.004 0.022 0.059\n", " -0.060 -0.020 0.003 0.024 0.058\n", " -0.055 -0.017 -0.002 0.020 0.070\n", " -0.053 -0.022 -0.004 0.010 0.048\n", " -0.057 -0.019 0.004 0.025 0.063\n", " -0.071 -0.019 0.000 0.021 0.085\n", " -0.069 -0.021 -0.003 0.022 0.061\n", " -0.069 -0.024 0.001 0.020 0.065\n", " -0.067 -0.025 -0.001 0.018 0.056\n", " -0.062 -0.017 0.002 0.022 0.070\n", " -0.062 -0.018 0.003 0.026 0.058\n", "\n" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Gelman-Rubin convergence diagnostics:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pm.gelman_rubin(ptrace[burn:])" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 6, "text": [ "{'alpha_0': 1.0194130588867218,\n", " 'alpha_1': 1.0042532712404557,\n", " 'alpha_12': 1.0041204872403353,\n", " 'alpha_2': 1.0037001462197992,\n", " 'b': array([ 1.01304092, 1.01520162, 1.01626442, 1.03409233, 1.01195989,\n", " 1.01161977, 1.06471208, 1.00517131, 1.04701133, 1.01351903,\n", " 1.02305173, 1.01105302, 1.00656117, 1.00287305, 1.04569727,\n", " 1.0372103 , 1.02239126, 1.01456888, 1.01328597, 1.04200797,\n", " 1.09234026]),\n", " 'tau': 1.0064134954958157}" ] } ], "prompt_number": 6 } ], "metadata": {} } ] }