{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "# Here we proceed with the assumption that the number of good boxes is 3\n", "\n", "# Assume as a prior that there are 3 of 13 good boxes\n", "priorG = 3.0/13.0\n", "priorB = 1.0-priorG\n", "print 'P( HG ) = ',priorG,' '*10,\n", "print 'P( HB ) = ',priorB\n", "\n", "# Calculate posteriors that box in hand is good box\n", "piG = 9.0/10.0 * priorG\n", "piB = 2.0/10.0 * priorB\n", "posteriorG = piG/(piG + piB)\n", "posteriorB = piB/(piG + piB)\n", "print 'P( HG | data ) = ',posteriorG,' '*3,\n", "print 'P( HB | data ) = ',posteriorB\n", "print\n", "\n", "# Calculate probability next item is valuable\n", "print 'P( V | data ) = ',(8.0/9.0 * posteriorG + 1.0/9.0 * posteriorB) " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "P( HG ) = 0.230769230769 P( HB ) = 0.769230769231\n", "P( HG | data ) = 0.574468085106 P( HB | data ) = 0.425531914894\n", "\n", "P( V | data ) = 0.557919621749\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "# Here we allow for a distribution of priors for the number of good boxes\n", "\n", "def p_Hi_given_data(priors):\n", " pi = [(9.0/10.0*float(i)/13.0 + 2.0/10.0*(13.0-float(i))/13.0)*priors[i]\\\n", " for i in range(len(priors))]\n", " return [p/sum(pi) for p in pi]\n", "\n", "def p_v_given_Hi_data(i):\n", " pi_HG_given_Hi_data = 9.0/10.0*float(i)/13.0\n", " pi_HB_given_Hi_data = 2.0/10.0*(13.0-float(i))/13.0\n", " p_HG_given_Hi_data = pi_HG_given_Hi_data/(pi_HG_given_Hi_data + pi_HB_given_Hi_data)\n", " p_HB_given_Hi_data = pi_HB_given_Hi_data/(pi_HG_given_Hi_data + pi_HB_given_Hi_data)\n", " \n", " return 8.0/9.0*p_HG_given_Hi_data + 1.0/9.0*p_HB_given_Hi_data\n", " \n", " \n", "def p_next_valuable(posteriors):\n", " return sum( p_v_given_Hi_data(i)*posteriors[i] for i in range(len(posteriors)))\n", "\n", "# First we consider the same case as above, only allowing for 3 good boxes.\n", "priors = [0.0, 0.0, 0.0, 1.0] + [0.0] * 9\n", "posteriors = p_Hi_given_data(priors)\n", "print 'Assuming 3 good boxes:'\n", "print 'Priors: ',priors\n", "print 'Posteriors:',posteriors\n", "print 'P( V | data ) = ',p_next_valuable(posteriors)\n", "print \n", "\n", "# Finally, we consider the distribution Bill gave in class\n", "print 'Assuming Bill\\'s prior distribution:'\n", "priors = [0.0, 0.0, .2, .3, .3, .1, .1] + [0.0] * 7\n", "posteriors = p_Hi_given_data(priors)\n", "print 'Priors: ',priors\n", "print 'Posteriors:',posteriors\n", "print 'P( V | data ) = ',p_next_valuable(posteriors)\n", "print\n", "\n", "# Actually, let's add a flat prior on 2-6\n", "print 'Assuming flat prior:'\n", "priors = [0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0] + [0.0] * 7\n", "posteriors = p_Hi_given_data(priors)\n", "print 'Priors: ',priors\n", "print 'Posteriors:',posteriors\n", "print 'P( V | data ) = ',p_next_valuable(posteriors)\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Assuming 3 good boxes:\n", "Priors: [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n", "Posteriors: [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n", "P( V | data ) = 0.557919621749\n", "\n", "Assuming Bill's prior distribution:\n", "Priors: [0.0, 0.0, 0.2, 0.3, 0.3, 0.1, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n", "Posteriors: [0.0, 0.0, 0.15625, 0.275390625, 0.31640624999999994, 0.119140625, 0.1328125, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n", "P( V | data ) = 0.603298611111\n", "\n", "Assuming flat prior:\n", "Priors: [0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n", "Posteriors: [0.0, 0.0, 0.14814814814814814, 0.17407407407407408, 0.19999999999999998, 0.22592592592592592, 0.2518518518518518, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]\n", "P( V | data ) = 0.62962962963\n" ] } ], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "We see, happily, that assuming exactly 3 good boxes gives the same result with both methods. Furthermore, the distributed prior gives an even higher probability that the next choice will be valuable." ] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }