{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Here we proceed with the assumption that the number of good boxes is 3\n",
      "\n",
      "# Assume as a prior that there are 3 of 13 good boxes\n",
      "priorG = 3.0/13.0\n",
      "priorB = 1.0-priorG\n",
      "print 'P( HG ) = ',priorG,' '*10,\n",
      "print 'P( HB ) = ',priorB\n",
      "\n",
      "# Calculate posteriors that box in hand is good box\n",
      "piG = 9.0/10.0 * priorG\n",
      "piB = 2.0/10.0 * priorB\n",
      "posteriorG = piG/(piG + piB)\n",
      "posteriorB = piB/(piG + piB)\n",
      "print 'P( HG | data ) = ',posteriorG,' '*3,\n",
      "print 'P( HB | data ) = ',posteriorB\n",
      "print\n",
      "\n",
      "# Calculate probability next item is valuable\n",
      "print 'P( V | data ) = ',(8.0/9.0 * posteriorG + 1.0/9.0 * posteriorB) "
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "P( HG ) =  0.230769230769            P( HB ) =  0.769230769231\n",
        "P( HG | data ) =  0.574468085106     P( HB | data ) =  0.425531914894\n",
        "\n",
        "P( V | data ) =  0.557919621749\n"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Here we allow for a distribution of priors for the number of good boxes\n",
      "\n",
      "def p_Hi_given_data(priors):\n",
      "    pi = [(9.0/10.0*float(i)/13.0 + 2.0/10.0*(13.0-float(i))/13.0)*priors[i]\\\n",
      "              for i in range(len(priors))]\n",
      "    return [p/sum(pi) for p in pi]\n",
      "\n",
      "def p_v_given_Hi_data(i):\n",
      "    pi_HG_given_Hi_data = 9.0/10.0*float(i)/13.0\n",
      "    pi_HB_given_Hi_data = 2.0/10.0*(13.0-float(i))/13.0\n",
      "    p_HG_given_Hi_data = pi_HG_given_Hi_data/(pi_HG_given_Hi_data + pi_HB_given_Hi_data)\n",
      "    p_HB_given_Hi_data = pi_HB_given_Hi_data/(pi_HG_given_Hi_data + pi_HB_given_Hi_data)\n",
      "    \n",
      "    return 8.0/9.0*p_HG_given_Hi_data + 1.0/9.0*p_HB_given_Hi_data\n",
      "    \n",
      "    \n",
      "def p_next_valuable(posteriors):\n",
      "    return sum( p_v_given_Hi_data(i)*posteriors[i] for i in range(len(posteriors)))\n",
      "\n",
      "# First we consider the same case as above, only allowing for 3 good boxes.\n",
      "priors = [0.0, 0.0, 0.0, 1.0] + [0.0] * 9\n",
      "posteriors = p_Hi_given_data(priors)\n",
      "print 'Assuming 3 good boxes:'\n",
      "print 'Priors:    ',priors\n",
      "print 'Posteriors:',posteriors\n",
      "print 'P( V | data ) = ',p_next_valuable(posteriors)\n",
      "print \n",
      "\n",
      "# Finally, we consider the distribution Bill gave in class\n",
      "print 'Assuming Bill\\'s prior distribution:'\n",
      "priors = [0.0, 0.0, .2, .3, .3, .1, .1] + [0.0] * 7\n",
      "posteriors = p_Hi_given_data(priors)\n",
      "print 'Priors:    ',priors\n",
      "print 'Posteriors:',posteriors\n",
      "print 'P( V | data ) = ',p_next_valuable(posteriors)\n",
      "print\n",
      "\n",
      "# Actually, let's add a flat prior on 2-6\n",
      "print 'Assuming flat prior:'\n",
      "priors = [0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0] + [0.0] * 7\n",
      "posteriors = p_Hi_given_data(priors)\n",
      "print 'Priors:    ',priors\n",
      "print 'Posteriors:',posteriors\n",
      "print 'P( V | data ) = ',p_next_valuable(posteriors)\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Assuming 3 good boxes:\n",
        "Priors:     [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n",
        "Posteriors: [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n",
        "P( V | data ) =  0.557919621749\n",
        "\n",
        "Assuming Bill's prior distribution:\n",
        "Priors:     [0.0, 0.0, 0.2, 0.3, 0.3, 0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n",
        "Posteriors: [0.0, 0.0, 0.15625, 0.275390625, 0.31640624999999994, 0.119140625, 0.1328125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n",
        "P( V | data ) =  0.603298611111\n",
        "\n",
        "Assuming flat prior:\n",
        "Priors:     [0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n",
        "Posteriors: [0.0, 0.0, 0.14814814814814814, 0.17407407407407408, 0.19999999999999998, 0.22592592592592592, 0.2518518518518518, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n",
        "P( V | data ) =  0.62962962963\n"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "We see, happily, that assuming exactly 3 good boxes gives the same result with both methods. Furthermore, the distributed prior gives an even higher probability that the next choice will be valuable."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}