{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "from lets_plot import *\n", "\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from scipy.stats import binom, beta" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Beta-Binomial model\n", "\n", "$\\displaystyle \\boxed{\\begin{array}{l}\n", "X|\\theta\\sim B_n(\\theta) \\text{ - likelihood} \\\\\n", "\\theta \\sim B(\\alpha,\\beta) \\text{ - prior}\n", "\\end{array}\n", "\\Rightarrow \\theta|(X=x) \\sim B(\\alpha+x,\\beta+n-x) \\text{ - posterior}}$\n", " \n", "The model:\n", "* Likelihood - Binomial distribution: $\\displaystyle{\\mathbb{P}(X=x|\\theta)=\\frac{n!}{(n-k)!k!}\\theta^x(1-\\theta)^{n-x}}$\n", "* Prior - Beta distribution: $\\displaystyle{p(\\theta)=\\frac{\\Gamma(\\alpha+\\beta)}{\\Gamma(\\alpha)\\Gamma(\\beta)}\\theta^{\\alpha-1}(1-\\theta)^{\\beta-1}}$\n", "* Posterior - Beta distribution: $\\displaystyle{p(\\theta|X=x)\\sim B(\\alpha+x,\\beta+n-x)}$" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "class BetaBinomial(object):\n", "\n", " def __init__(self, prior_params=(1., 1.), dist=beta, x=np.linspace(0., 1., 100)):\n", " self.prior_params = prior_params\n", " self.dist = dist\n", " self.x = x\n", "\n", " def posterior(self, trials = None, observations = None):\n", " a_prior, b_prior = self.prior_params\n", " self.posterior_prob =[]\n", " self.ci = []\n", " for i, n in enumerate (trials):\n", " y = observations[i]\n", " post = self.dist(a_prior + y, b_prior + n - y)\n", " self.posterior_prob.append(post.pdf(self.x))\n", " self.ci.append(post.interval(0.95))\n", " return self.posterior_prob" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "a_prior = 1.\n", "b_prior = 1.\n", "n = 50\n", "prior_params = (a_prior, b_prior)\n", "bb = BetaBinomial(prior_params)\n", "true_theta = 0.25\n", "trials = np.arange(n)\n", "observations = np.array([binom(n=trials[i], p=true_theta).rvs(size=1)[0] for i in range(n)])\n", "posterior = bb.posterior(trials, observations)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "columns = ['x']\n", "columns.extend(['p_' + str(i) for i in np.arange(n)])\n", "post = columns[-1]\n", "prior = columns[1]\n", "data = pd.DataFrame(np.hstack((bb.x[:, np.newaxis], np.array(posterior).T)), columns=columns)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "data_melt = data.melt(id_vars='x')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p = ggplot(data_melt) \\\n", " + geom_area(aes(x='x', y='value', group='variable'), \n", " position='identity', \n", " size=0, alpha=0.1)\n", "p" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p += geom_vline(xintercept=true_theta, color='blue', linetype='dashed')\n", "p += geom_text(x=true_theta, y=14, label='Actual', color='blue', hjust=1)\n", "p += scale_y_continuous(limits=[0, 15], expand=[0, 0])\n", "p" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "where_mode = bb.x[np.where(posterior[n - 1] == np.max(posterior[n - 1]))[0][0]]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p += geom_vline(xintercept=where_mode, color='red', linetype='dashed')\n", "p += geom_text(x=where_mode, y=13, label='Posterior', color='red', hjust=1)\n", "p" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " " ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "p += geom_line(aes(x='x', y=post), data=data, color='red')\n", "p" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "