{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Introduction to Simulations #\n",
"\n",
"In this notebook we will learn:\n",
"\n",
"- Comparisons\n",
"- For-loops\n",
"- Basic structure of a simulation by for-loops\n",
"- Use of sum with comparison to count successes in a simulation\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from datascience import *\n",
"import numpy as np\n",
"\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plots\n",
"plots.style.use('fivethirtyeight')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Comparison ##"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"3 > 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"type(3 > 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"true"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"3 = 3 "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"3 == 3.0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"10 != 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = 14\n",
"y = 3"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x > 15"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"12 < x"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x < 20"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"12 < x < 20"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"10 < x-y < 13"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x > 13 and y < 3.14159"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Comparisons with arrays"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pets == 'cat'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"1 + 1 + 0 + 1 + 0 + 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum(make_array(True, True, False, True, False, False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum(pets == 'dog')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.count_nonzero(pets == 'dog')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = np.arange(20, 31)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"x > 28"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## For-loops##\n",
"\n",
"Python has a `for`. The stucture is like this:\n",
"\n",
"for variable in list or array:\n",
" \n",
" body of loop\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rainbow = make_array('red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet')\n",
"\n",
"for color in rainbow:\n",
" print(color)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for thing in rainbow:\n",
" print(thing)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"num_array = np.arange(1, 3.25, 0.25)\n",
"\n",
"## This for-loop is meaningless, don't try to figure out what's being computed\n",
"## we just want to demonstrate that a for-loop can involve multiple steps\n",
"\n",
"for i in num_array:\n",
" i2 = i**2\n",
" i3 = i2 - 1\n",
" i4 = i3*(1.09)\n",
" print(i4)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for k in np.arange(11):\n",
" print(k**3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"num_list = [1, 2, 3, 4, 5, 6, 7, 8]\n",
"\n",
"for k in num_list:\n",
" print((k - 1)**0.5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Appending Arrays\n",
"\n",
"We'll see that appending an array can be a good way to keep track to the results of multiple simulations. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"first = np.arange(4)\n",
"second = np.arange(10, 17)\n",
"second"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.append(first, 6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"first"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.append(first, second)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"first"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"second"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"squares = make_array() # an empty array\n",
"\n",
"num_array = np.arange(11)\n",
"\n",
"for i in num_array:\n",
" squares = np.append(squares, i**2)\n",
" \n",
"squares"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Simulation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's play a game: we each roll a die. \n",
"\n",
"If my number is bigger: you pay me a dollar.\n",
"\n",
"If they're the same: we do nothing.\n",
"\n",
"If your number is bigger: I pay you a dollar."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Steps:\n",
"1. Find a way to simulate the roll of a die, then generalize to two dice.\n",
"2. Compute how much money we win/lose based on the result.\n",
"3. Do steps 1 and 2 10,000 times."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Random Selection\n",
"\n",
"The `np.random.choice` function can help here."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"die_faces = np.arange(1, 7)\n",
"die_faces"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.random.choice(die_faces)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"np.random.choice(die_faces, 10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Conditional Statements"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Work in progress\n",
"def one_round(my_roll, your_roll):\n",
" if my_roll > your_roll:\n",
" return 1"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"one_round(4, 3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"one_round(2, 6)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Final correct version\n",
"def one_round(my_roll, your_roll):\n",
" if my_roll > your_roll:\n",
" return 1\n",
" elif your_roll > my_roll:\n",
" return -1\n",
" elif your_roll == my_roll:\n",
" return 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"one_round(1, 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"one_round(6, 5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"one_round(7, -1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def simulate_one_round():\n",
" my_roll = np.random.choice(die_faces)\n",
" your_roll = np.random.choice(die_faces)\n",
" return one_round(my_roll, your_roll)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"simulate_one_round()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Repeated Betting ###"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"results = make_array()\n",
"results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"results = np.append(results, simulate_one_round())\n",
"results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"game_outcomes = make_array()\n",
"\n",
"for i in np.arange(5):\n",
" game_outcomes = np.append(game_outcomes, simulate_one_round())\n",
" \n",
"game_outcomes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"game_outcomes = make_array()\n",
"\n",
"for i in np.arange(10000):\n",
" game_outcomes = np.append(game_outcomes, simulate_one_round())\n",
" \n",
"game_outcomes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"len(game_outcomes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"results = Table().with_column('My winnings', game_outcomes)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"results"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"results.group('My winnings').barh('My winnings')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"game_outcomes = make_array()\n",
"\n",
"for i in np.arange(10000):\n",
" game_outcomes = np.append(game_outcomes, simulate_one_round())\n",
" \n",
"results = Table().with_column('My winnings', game_outcomes)\n",
"\n",
"results.group('My winnings').barh('My winnings')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Would this game be a good way to make money? ##"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum(results.column(0))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Bonus question: This simulation is relatively simple. \n",
"# Can you find a way to run it without using a for loop?\n",
"\n",
"my_rolls = np.random.choice(np.arange(1,7), size = 10000)\n",
"your_rolls = np.random.choice(np.arange(1,7), size = 10000)\n",
"\n",
"results = Table().with_columns(\"Mine\", my_rolls, \"Yours\", your_rolls)\n",
"\n",
"results = results.with_column(\"Results\", results.apply(one_round, \"Mine\", \"Yours\"))\n",
"\n",
"results.group(\"Results\")\n",
"\n",
"results.group(\"Results\").barh(\"Results\")"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### Another example: simulating heads in 100 coin tosses\n",
"\n",
"If 100 people individually flipped their own fair coin at the same time (or one very bored person flipped a fair coin 100 times), would it be reasonable if 40 or fewer of them came up heads?\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"coin = make_array('heads', 'tails')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum(np.random.choice(coin, 100) == 'heads')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Simulate one outcome\n",
"\n",
"def num_heads():\n",
" return sum(np.random.choice(coin, 100) == 'heads')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Decide how many times you want to repeat the experiment\n",
"\n",
"repetitions = 10000"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Simulate that many outcomes\n",
"\n",
"outcomes = make_array()\n",
"\n",
"for i in np.arange(repetitions):\n",
" outcomes = np.append(outcomes, num_heads())\n",
" \n",
"heads = Table().with_column('Heads', outcomes)\n",
"heads.hist(bins = np.arange(29.5, 70.6), right_end = 40)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"heads = Table().with_column('Heads', outcomes)\n",
"heads.hist(bins = np.arange(29.5, 70.6), right_end = 40)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"They yellow section; how many is that?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum(heads.column(0)<=40)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum(outcomes <=40)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Then what proportion is that?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"290/10000"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"What interval captures the middle 95% of these outcomes?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
" np.percentile(outcomes, make_array(2.5, 97.5))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Famous Monty Hall Problem ##\n",
"\n",
"On the game show, Let's Make a Deal, one of the more popular games was a simple guessing game involving three doors. One door would hide a desireable prize (an expensive vacation, a new car, or something of similar value). The other two doors would hide a fake prize, often a goat. The way the game was played was simple:\n",
"\n",
"1. The player picks a door\n",
"2. Monty Hall (the show's host) would ask that *a different* door be opened, revealing one of the two goats.\n",
"3. Monty would offer the player the opportunity to switch to the *other* unopened door. \n",
"\n",
"The mathematical/probability/statistical question is this: should the player switch doors?\n",
"\n",
"To put it another way, which player strategy has the higher likelihood of winning, picking a door and sticking with it, or picking a door and automatically switching once another door has been opened?\n",
"\n",
"**Strategy 1:** The pick & stick (pick a door and don't switch when given the change)\n",
"\n",
"**Strategy 2:** The pick & switch (pick a door, but automatically switch to the other when it's offered)\n",
"\n",
"Let's use simulations to decide which strategy is better.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"doors = make_array('car', 'first goat', 'second goat')\n",
"\n",
"goats = make_array('first goat', 'second goat')\n",
"\n",
"def other_goat(a_goat):\n",
" if a_goat == 'first goat':\n",
" return 'second goat'\n",
" elif a_goat == 'second goat':\n",
" return 'first goat'\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def monty_hall():\n",
" \n",
" contestant_choice = np.random.choice(doors)\n",
" \n",
" if contestant_choice == 'first goat':\n",
" monty_choice = 'second goat'\n",
" remaining_door = 'car'\n",
" \n",
" elif contestant_choice == 'second goat':\n",
" monty_choice = 'first goat'\n",
" remaining_door = 'car'\n",
" \n",
" elif contestant_choice == 'car':\n",
" monty_choice = np.random.choice(goats)\n",
" remaining_door = other_goat(monty_choice)\n",
" \n",
" return [contestant_choice, monty_choice, remaining_door]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"games = Table(['Strategy 1 Prize', 'Revealed', 'Strategy 2 Prize'])\n",
"\n",
"reps = 10000\n",
"\n",
"for i in range(reps):\n",
" games.append(monty_hall())\n",
" \n",
"games"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum(games.column('Strategy 1 Prize')=='car')/reps"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sum(games.column('Strategy 2 Prize')=='car')/reps"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 1
}