{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/local/cs/local/install/anaconda3-5.3.1-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/dask/config.py:168: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", " data = yaml.load(f.read()) or {}\n", "/home/local/cs/local/install/anaconda3-5.3.1-Linux-x86_64/envs/py36ds/lib/python3.6/site-packages/distributed/config.py:20: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.\n", " defaults = yaml.load(f)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "cs224 \n", "last updated: 2020-03-07 \n", "\n", "CPython 3.6.8\n", "IPython 7.3.0\n", "\n", "numpy 1.16.2\n", "xarray 0.11.3\n", "scipy 1.2.1\n", "pandas 0.24.2\n", "sklearn 0.20.3\n", "matplotlib 3.0.3\n", "seaborn 0.9.0\n", "pymc3 3.8\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark -a 'cs224' -u -d -v -p numpy,xarray,scipy,pandas,sklearn,matplotlib,seaborn,pymc3" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import numpy as np, scipy, scipy.stats as stats, scipy.special, scipy.misc, pandas as pd, matplotlib.pyplot as plt, seaborn as sns, xarray as xr\n", "import matplotlib as mpl\n", "\n", "import pymc3 as pm\n", "\n", "import theano as thno\n", "import theano.tensor as T\n", "\n", "import datetime, time, math\n", "from dateutil import relativedelta\n", "\n", "from collections import OrderedDict\n", "\n", "SEED = 42\n", "np.random.seed(SEED)\n", "\n", "pd.set_option('display.max_columns', 500)\n", "pd.set_option('display.width', 1000)\n", "# pd.set_option('display.float_format', lambda x: '%.2f' % x)\n", "np.set_printoptions(edgeitems=10)\n", "np.set_printoptions(linewidth=1000)\n", "np.set_printoptions(suppress=True)\n", "np.core.arrayprint._line_width = 180\n", "\n", "sns.set()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from IPython.display import display, HTML\n", "\n", "from IPython.display import display_html\n", "def display_side_by_side(*args):\n", " html_str=''\n", " for df in args:\n", " if type(df) == np.ndarray:\n", " df = pd.DataFrame(df)\n", " html_str+=df.to_html()\n", " html_str = html_str.replace('table','table style=\"display:inline\"')\n", " # print(html_str)\n", " display_html(html_str,raw=True)\n", "\n", "CSS = \"\"\"\n", ".output {\n", " flex-direction: row;\n", "}\n", "\"\"\"\n", "\n", "def display_graphs_side_by_side(*args):\n", " html_str=''\n", " for g in args:\n", " html_str += ''\n", " html_str += '
'\n", " html_str += g._repr_svg_()\n", " html_str += '
'\n", " display_html(html_str,raw=True)\n", " \n", "\n", "display(HTML(\"\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Model Comparison via Bayes Factor\n", "\n", "* [Doing Bayesian Data Analysis: A Tutorial with R, JAGS, and Stan](https://www.amazon.de/Doing-Bayesian-Data-Analysis-Tutorial/dp/0124058884) p. 268" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# tail-biased factory\n", "omega1, kappa1 = 0.25, 12" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# head-biased factory\n", "omega2, kappa2 = 0.75, 12" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Flip coin 9 times and get 6 heads\n", "samples = np.array([0,0,0,1,1,1,1,1,1])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def pD(z,N,a,b):\n", " #return scipy.special.beta(z+a, N-z+b)/scipy.special.beta(a,b)\n", " return np.exp(np.log(scipy.special.beta(z+a, N-z+b)) - np.log(scipy.special.beta(a,b)))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def ok2ab(omega, kappa):\n", " return omega*(kappa-2)+1,(1-omega)*(kappa-2)+1" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "a1,b1 = ok2ab(omega1,kappa1)\n", "a2,b2 = ok2ab(omega2,kappa2)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0004993438720703131" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p1 = pD(6,9,a1,b1)\n", "p1" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0023385561429537273" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p2 = pD(6,9,a2,b2)\n", "p2" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def fn(a, b):\n", " return lambda p: stats.bernoulli(p).pmf(samples).prod() * stats.beta(a,b).pdf(p)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.000102996826171875" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fn(1,1)(0.25)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.0004993438720702594, 2.322913347907592e-09)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scipy.integrate.quad(fn(a1,b1),0.0,1.0)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0004993438720703131" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p1" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.002338556142956198, 2.1998184481120156e-11)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scipy.integrate.quad(fn(a2,b2),0.0,1.0)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0023385561429537273" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Via PyMC3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* [Bayes Factors and Marginal Likelihood](https://docs.pymc.io/notebooks/Bayes_factor.html)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Sample initial stage: ...\n", "Stage: 0 Beta: 0.656 Steps: 100 Acce: 1.000\n", "Stage: 1 Beta: 1.000 Steps: 100 Acce: 0.704\n", "Sample initial stage: ...\n", "Stage: 0 Beta: 1.000 Steps: 100 Acce: 1.000\n" ] } ], "source": [ "# Flip coin 9 times and get 6 heads\n", "priors = ((a1, b1), (a2, b2))\n", "models = []\n", "traces = []\n", "for alpha, beta in priors:\n", " with pm.Model() as model:\n", " a = pm.Beta('a', alpha, beta)\n", " yl = pm.Bernoulli('yl', a, observed=samples)\n", " trace = pm.sample_smc(4000, n_steps=100, random_seed=42)\n", " models.append(model)\n", " traces.append(trace)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0004993438720703131" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p1" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0.0004979784247939404" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "models[0].marginal_likelihood" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0023385561429537273" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p2" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.002364542043803754" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "models[1].marginal_likelihood" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4.748282106362706" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "BF_smc = models[1].marginal_likelihood / models[0].marginal_likelihood\n", "BF_smc" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4.683257918552034" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p2/p1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### PyMC3 log-likelihood at point-estimate\n", "\n", "* [General API quickstart](https://docs.pymc.io/notebooks/api_quickstart.html)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# taking as example data that is distributed like gauss(mu=-3.0, sigma=0.5):\n", "samples2 = stats.norm(loc=-3, scale=0.5).rvs(size=1000, random_state=np.random.RandomState(42))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-704.9307117016448" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this is the log likelihood we expect to see from the point estimate:\n", "stats.norm(loc=-3, scale=0.5).logpdf(samples2).sum()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "with pm.Model() as mcmc_model:\n", " mcmc_model_m = pm.Uniform('mean', lower=-10., upper=10.) # , transform=None\n", " mcmc_model_s = pm.Uniform('s', lower=0.1, upper=5.0) # , transform=None\n", " mcmc_model_gs = pm.Normal('gs', mu=mcmc_model_m, sigma=mcmc_model_s, observed=samples2)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Auto-assigning NUTS sampler...\n", "Initializing NUTS using adapt_diag...\n", "Multiprocess sampling (4 chains in 4 jobs)\n", "NUTS: [s, mean]\n", "Sampling 4 chains, 0 divergences: 100%|██████████| 12000/12000 [00:02<00:00, 4874.64draws/s]\n" ] } ], "source": [ "with mcmc_model:\n", " mcmc_model_trace = pm.sample(2000, tune=1000, init='adapt_diag')" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
meansdhpd_3%hpd_97%mcse_meanmcse_sdess_meaness_sdess_bulkess_tailr_hat
mean-2.990.016-3.019-2.9610.00.07452.07450.07447.05543.01.0
s0.490.0110.4710.5120.00.07633.07627.07636.06271.01.0
\n", "
" ], "text/plain": [ " mean sd hpd_3% hpd_97% mcse_mean mcse_sd ess_mean ess_sd ess_bulk ess_tail r_hat\n", "mean -2.99 0.016 -3.019 -2.961 0.0 0.0 7452.0 7450.0 7447.0 5543.0 1.0\n", "s 0.49 0.011 0.471 0.512 0.0 0.0 7633.0 7627.0 7636.0 6271.0 1.0" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pm.summary(mcmc_model_trace).round(3)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[mean_interval__, s_interval__]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcmc_model.free_RVs" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['mean_interval__', 's_interval__', 'mean', 's']" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcmc_model_trace.varnames" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/latex": [ "$\\text{mean} \\sim \\text{Uniform}(\\mathit{lower}=-10.0,~\\mathit{upper}=10.0)$" ], "text/plain": [ "mean" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcmc_model_m_d = mcmc_model.deterministics[0]\n", "mcmc_model_m_d" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(-9.05148253)" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcmc_model_m_d.transformation.backward(-3.0).eval()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(-0.61903921)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcmc_model_m_d.transformation.forward(-3.0).eval()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For our point-estimate the value of the `model.logp()` gives a different value than the dependent random variable's `mcmc_model_gs.logp()` (see next cell).\n", "The reason is that here, for the `mcmc_model.logp()`, we add the log-likelihood of the prior distributions, which make it more negative.\n", "In the next cell, for `mcmc_model_gs.logp()`, we treat the prior as an [improper prior](https://en.wikipedia.org/wiki/Prior_probability#Improper_priors)." ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(-709.00200049)" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcmc_model.logp({'mean_interval__': mcmc_model_m_d.transformation.forward(-3.0).eval(), 's_interval__': mcmc_model.deterministics[1].transformation.forward(0.5).eval()})" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(-704.9307117)" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcmc_model_gs.logp({'mean_interval__': mcmc_model_m_d.transformation.forward(-3.0).eval(), 's_interval__': mcmc_model.deterministics[1].transformation.forward(0.5).eval()})" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "import itertools" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'mean_interval__': -0.6169506292247524, 's_interval__': -2.4482681357926412}" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs])" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(-708.38320197)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcmc_model.logp(dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The value of the following cell differs slightly from above, because above we used the known $\\mu=-3.0$ and $\\sigma=0.5$ and\n", "in the following cell we use the estimated $\\mu=-2.99$ and $\\sigma=0.49$" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(-704.28913595)" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mcmc_model_gs.logp(dict([[varobj.name, mcmc_model_trace[varobj.name].mean()] for varobj in mcmc_model.free_RVs]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }