{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/downey/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", " from ._conv import register_converters as _register_converters\n" ] } ], "source": [ "from __future__ import print_function, division\n", "\n", "%matplotlib inline\n", "\n", "import numpy as np\n", "import pymc3 as pm\n", "import scipy\n", "import seaborn as sns\n", "\n", "import matplotlib.pyplot as plt\n", "\n", "import thinkbayes2\n", "import thinkplot" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I want to predict the number of goals scored in the next game, where\n", "\n", "`goals ~ Poisson(mu)`\n", "\n", "`mu ~ Gamma(alpha, beta)`\n", "\n", "Suppose my posterior distribution for `mu` has `alpha=10`, `beta=5`." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "alpha = 10\n", "beta = 5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I can draw a sample from the posterior, and it has the mean I expect, `alpha/beta`" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.0014370180768606" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iters = 100000\n", "sample_mu = np.random.gamma(shape=alpha, scale=1/beta, size=iters)\n", "np.mean(sample_mu)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.0" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mu = alpha / beta\n", "mu" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I can sample from the predictive distribution by drawing one Poisson sample for each sampled value of `mu`, and it has the mean I expect." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.00996" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_pred = np.random.poisson(sample_mu)\n", "np.mean(sample_pred)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now I'll try to do the same thing with pymc3.\n", "\n", "Pretending that `mu` is a known constant, I can sample from `Poisson(mu)` and I get the mean I expect." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.00449" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = pm.Model()\n", "\n", "with model:\n", " goals = pm.Poisson('goals', mu)\n", " sample_pred_wrong_pm = goals.random(size=iters)\n", "\n", "np.mean(sample_pred_wrong_pm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "And sampling from the posterior disrtribution of `mu`, I get the mean I expect." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.9981993583520818" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = pm.Model()\n", "\n", "with model:\n", " mu = pm.Gamma('mu', alpha, beta)\n", " sample_post_pm = mu.random(size=iters)\n", "\n", "np.mean(sample_post_pm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "But if I try to sample from the posterior predictive distribution (at least in the way I expected it to work), I don't get the mean I expect." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.37646" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = pm.Model()\n", "\n", "with model:\n", " mu = pm.Gamma('mu', alpha, beta)\n", " goals = pm.Poisson('goals', mu)\n", " sample_pred_pm = goals.random(size=iters)\n", "\n", "np.mean(sample_pred_pm)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It looks like it might be taking one sample from the Gamma distribution and using it to generate the entire sample of goals.\n", "\n", "I suspect something is wrong with my mental model of how to specify the model in pymc3." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 1 }