{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Exploration of a problem interpreting binary test results\n",
    "\n",
    "Copyright 2015 Allen Downey\n",
    "MIT License"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function, division\n",
    "\n",
    "import thinkbayes2\n",
    "\n",
    "from sympy import symbols"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`p` is the prevalence of a condition\n",
    "`s` is the sensititivity of the test\n",
    "\n",
    "The false positive rate is known to be either `t1` (with probability `q`) or `t2` (with probability `1-q`)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "p, q, s, t1, t2 = symbols('p q s t1 t2')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "I'll use `a` through `h` for each of the 8 possible conditions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "a, b, c, d, e, f, g, h = symbols('a b c d e f g h')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And here are the probabilities of the conditions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "a =   q   *   p   *   s\n",
    "b =   q   *   p   * (1-s)\n",
    "c =   q   * (1-p) *   t1\n",
    "d =   q   * (1-p) * (1-t1)\n",
    "e = (1-q) *   p   *   s\n",
    "f = (1-q) *   p   * (1-s)\n",
    "g = (1-q) * (1-p) *   t2\n",
    "h = (1-q) * (1-p) * (1-t2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pmf({'notsick': t1*(-p + 1), 'sick': p*s})"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf1 = thinkbayes2.Pmf()\n",
    "pmf1['sick'] = p*s\n",
    "pmf1['notsick'] = (1-p)*t1\n",
    "pmf1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "p*s - t1*(p - 1)"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nc1 = pmf1.Normalize()\n",
    "nc1.simplify()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pmf({'notsick': t2*(-p + 1), 'sick': p*s})"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf2 = thinkbayes2.Pmf()\n",
    "pmf2['sick'] = p*s\n",
    "pmf2['notsick'] = (1-p)*t2\n",
    "pmf2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "p*s - t2*(p - 1)"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nc2 = pmf2.Normalize()\n",
    "nc2.simplify()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0*q*(p*s + t1*(-p + 1)) + (-1.0*q + 1.0)*(p*s + t2*(-p + 1))"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf_t = thinkbayes2.Pmf({t1:q, t2:1-q})\n",
    "pmf_t[t1] *= nc1\n",
    "pmf_t[t2] *= nc2\n",
    "pmf_t.Normalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0*(q*t1*(p*s - t1*(p - 1)) - t2*(q - 1)*(p*s - t2*(p - 1)))/(q*(p*s - t1*(p - 1)) - (q - 1)*(p*s - t2*(p - 1)))"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf_t.Mean().simplify()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.662000000000000"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d1 = dict(q=0.5, p=0.1, s=0.5, t1=0.2, t2=0.8)\n",
    "pmf_t.Mean().evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.554000000000000"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d2 = dict(q=0.75, p=0.1, s=0.5, t1=0.4, t2=0.8)\n",
    "pmf_t.Mean().evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.615000000000000"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf_t[t1].evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0*p*s/(q*(p*s - t1*(p - 1)) - (q - 1)*(p*s - t2*(p - 1)))"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = pmf_t[t1] * pmf1['sick'] + pmf_t[t2] * pmf2['sick']\n",
    "x.simplify()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.100000000000000"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.100000000000000"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pmf({'notsick': (-p + 1)*(q*t1 + t2*(-q + 1)), 'sick': p*s})"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t = q * t1 + (1-q) * t2\n",
    "\n",
    "pmf = thinkbayes2.Pmf()\n",
    "pmf['sick'] = p*s\n",
    "pmf['notsick'] = (1-p)*t\n",
    "pmf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "p*s + (-p + 1)*(q*t1 + t2*(-q + 1))"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf.Normalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0*p*s/(p*s - (p - 1)*(q*t1 - t2*(q - 1)))"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf['sick'].simplify()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.100000000000000"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf['sick'].evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.100000000000000"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf['sick'].evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "p**2*q*s**2 + p**2*s**2*(-q + 1) + 2*p*q*s*t1*(-p + 1) + p*s*t2*(-p + 1)*(-2*q + 2) + q*t1**2*(-p + 1)**2 + t2**2*(-p + 1)**2*(-q + 1)"
      ]
     },
     "execution_count": 111,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gold = thinkbayes2.Pmf()\n",
    "gold['0 sick t1'] = q * (1-p)**2 * t1**2\n",
    "gold['1 sick t1'] = q * 2*p*(1-p) * s * t1\n",
    "gold['2 sick t1'] = q * p**2 * s**2\n",
    "gold['0 sick t2'] = (1-q) * (1-p)**2 * t2**2\n",
    "gold['1 sick t2'] = (1-q) * 2*p*(1-p) * s * t2\n",
    "gold['2 sick t2'] = (1-q) * p**2 * s**2\n",
    "gold.Normalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.852895633323010"
      ]
     },
     "execution_count": 112,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p0 = gold['0 sick t1'] + gold['0 sick t2'] \n",
    "p0.evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.826831935836675"
      ]
     },
     "execution_count": 113,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p0.evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "t = q * t1 + (1-q) * t2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "collapsed = thinkbayes2.Pmf()\n",
    "collapsed['0 sick'] = (1-p)**2 * t**2\n",
    "collapsed['1 sick'] = 2*p*(1-p) * s * t\n",
    "collapsed['2 sick'] = p**2 * s**2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "p**2*s**2 + 2*p*s*(-p + 1)*(q*t1 + t2*(-q + 1)) + (-p + 1)**2*(q*t1 + t2*(-q + 1))**2"
      ]
     },
     "execution_count": 116,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "collapsed.Normalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.810000000000000"
      ]
     },
     "execution_count": 117,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "collapsed['0 sick'].evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.810000000000000"
      ]
     },
     "execution_count": 118,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "collapsed['0 sick'].evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pmf1 = thinkbayes2.Pmf()\n",
    "pmf1['0 sick'] = (1-p)**2 * t1**2\n",
    "pmf1['1 sick'] = 2*p*(1-p) * s * t1\n",
    "pmf1['2 sick'] = p**2 * s**2\n",
    "nc1 = pmf1.Normalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pmf2 = thinkbayes2.Pmf()\n",
    "pmf2['0 sick'] = (1-p)**2 * t2**2\n",
    "pmf2['1 sick'] = 2*p*(1-p) * s * t2\n",
    "pmf2['2 sick'] = p**2 * s**2\n",
    "nc2 = pmf2.Normalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0*q*(p**2*s**2 + 2*p*s*t1*(-p + 1) + t1**2*(-p + 1)**2) + (-1.0*q + 1.0)*(p**2*s**2 + 2*p*s*t2*(-p + 1) + t2**2*(-p + 1)**2)"
      ]
     },
     "execution_count": 121,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf_t = thinkbayes2.Pmf({t1:q, t2:1-q})\n",
    "pmf_t[t1] *= nc1\n",
    "pmf_t[t2] *= nc2\n",
    "pmf_t.Normalize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0*(p - 1)**2*(q*t1**2 + t2**2*(-q + 1))/(q*(p**2*s**2 - 2*p*s*t1*(p - 1) + t1**2*(p - 1)**2) - (q - 1)*(p**2*s**2 - 2*p*s*t2*(p - 1) + t2**2*(p - 1)**2))"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = pmf_t[t1] * pmf1['0 sick'] + pmf_t[t2] * pmf2['0 sick']\n",
    "x.simplify()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.852895633323010, 0.852895633323010)"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.evalf(subs=d1), p0.evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.826831935836675, 0.826831935836675)"
      ]
     },
     "execution_count": 124,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.evalf(subs=d2), p0.evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`pmf_t` represents the distribution of `t`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0*q*t1 - 1.0*t2*(q - 1)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf_t = thinkbayes2.Pmf({t1:q, t2:1-q})\n",
    "pmf_t.Mean().simplify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "I'll consider two sets of parameters, `d1` and `d2`, which have the same mean value of `t`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.500000000000000"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d1 = dict(q=0.5, p=0.1, s=0.5, t1=0.2, t2=0.8)\n",
    "pmf_t.Mean().evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.500000000000000"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d2 = dict(q=0.75, p=0.1, s=0.5, t1=0.4, t2=0.8)\n",
    "pmf_t.Mean().evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`prob` takes two numbers that represent odds in favor and returns the corresponding probability."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def prob(yes, no):\n",
    "    return yes / (yes + no)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Scenario A\n",
    "\n",
    "In the first scenario, there are two kinds of people in the world, or two kinds of tests, so there are four outcomes that yield positive tests: two true positives (a and d) and two false positives (c and g).\n",
    "\n",
    "We can compute the probability of a true positive given a positive test:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "p*s/(-p*q*t1 + p*q*t2 + p*s - p*t2 + q*t1 - q*t2 + t2)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res = prob(a+e, c+g)\n",
    "res.simplify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this scenario, the two parameter sets yield the same answer:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.100000000000000"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res.evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.100000000000000"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "res.evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Scenario B\n",
    "\n",
    "Now suppose instead of two kinds of people, or two kinds of tests, the distribution of `t` represents our uncertainty about `t`.  That is, we are only considering one test, and we think the false positive rate is the same for everyone, but we don't know what it is.\n",
    "\n",
    "In this scenario, we need to think about the sampling process that brings patients to see doctors.  There are three possibilities:\n",
    "\n",
    "B1. Only patients who test positive see a doctor.\n",
    "\n",
    "B2. All patients see a doctor with equal probability, regardless of test results and regardless of whether they are sick or not.\n",
    "\n",
    "B3.  Patients are more or less likely to see a doctor, depending on the test results and whether they are sick or not."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Scenario B1\n",
    "\n",
    "If patients only see a doctor after testing positive, the doctor doesn't learn anything about `t` just because a patient tests positive.  In that case, the doctor should compute the conditional probabilities:\n",
    "\n",
    "*  `p1` is the probability the patient is sick given a positive test and `t1`\n",
    "*  `p2` is the probability the patient is sick given a positive test and `t2`\n",
    "\n",
    "We can compute `p1` and `p2`, form `pmf_p`, and compute its mean:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "p*s/(p*s - t1*(p - 1))"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p1 = prob(a, c)\n",
    "p1.simplify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.217391304347826"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p1.evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "p*s/(p*s - t2*(p - 1))"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p2 = prob(e, g)\n",
    "p2.simplify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0649350649350649"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p2.evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pmf_p = thinkbayes2.Pmf([p1, p2])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.5*p*s*(2*p*s - t1*(p - 1) - t2*(p - 1))/((p*s - t1*(p - 1))*(p*s - t2*(p - 1)))"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf_p.Mean().simplify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.141163184641446"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pmf_p.Mean().evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.121951219512195, 0.0649350649350649, 0.0934431422236300)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "p1.evalf(subs=d2), p2.evalf(subs=d2), pmf_p.Mean().evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Scenario B2\n",
    "\n",
    "If all patients see a doctor, the doctor can learn about `t` based on the number of positive and negative tests.\n",
    "\n",
    "* The likelihood of a positive test given `t1` is `(a+c)/q`\n",
    "\n",
    "* The likelihood of a positive test given `t2` is `(e+g)/(1-q)`\n",
    "\n",
    "`update` takes a `pmf` and updates it with these likelihoods"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def update(pmf):\n",
    "    post = pmf.Copy()\n",
    "    post[p1] *= (a + c) / q\n",
    "    post[p2] *= (e + g) / (1-q)\n",
    "    post.Normalize()\n",
    "    return post"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`post` is what we should believe about `p` after seeing one patient with a positive test:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-0.5*p*s + 0.5*t1*(p - 1))/(-1.0*p*s + 0.5*t1*(p - 1) + 0.5*t2*(p - 1))"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "post = update(pmf_p)\n",
    "post[p1].simplify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-1.0*p*s/(-1.0*p*s + 0.5*t1*(p - 1) + 0.5*t2*(p - 1))"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "post.Mean().simplify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "When `q` is 0.5, the posterior mean is `p`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.100000000000000"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "post.Mean().evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "But other distributions of `t` yield different values."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0847457627118644"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "post.Mean().evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's see what we get after seeing two patients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "post2 = update(post)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0*p*s*(2*p*s - t1*(p - 1) - t2*(p - 1))/((p*s - t1*(p - 1))**2 + (p*s - t2*(p - 1))**2)"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "post2.Mean().simplify()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Positive tests are more likely under `t2` than `t1`, so each positive test makes it more likely that `t=t2`.  So the expected value of `p` converges on `p2`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0774233508826262"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "post2.Mean().evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0775295663600526"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "post2.Mean().evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0688926818860678"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "post3 = update(post2)\n",
    "post3.Mean().evalf(subs=d1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0724135699794844"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "post3.Mean().evalf(subs=d2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}