{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Introduction to Simulations #\n",
    "\n",
    "In this notebook we will learn:\n",
    "\n",
    "- Comparisons\n",
    "- For-loops\n",
    "- Basic structure of a simulation by for-loops\n",
    "- Use of sum with comparison to count successes in a simulation\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datascience import *\n",
    "import numpy as np\n",
    "\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plots\n",
    "plots.style.use('fivethirtyeight')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Comparison ##"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "3 > 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "type(3 > 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "true"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "3 = 3 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "3 == 3.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "10 != 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = 14\n",
    "y = 3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x > 15"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "12 < x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x < 20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "12 < x < 20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "10 < x-y < 13"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x > 13 and y < 3.14159"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Comparisons with arrays"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets = make_array('cat', 'cat', 'dog', 'cat', 'dog', 'rabbit')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pets == 'cat'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "1 + 1 + 0 + 1 + 0 + 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(make_array(True, True, False, True, False, False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(pets == 'dog')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.count_nonzero(pets == 'dog')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.arange(20, 31)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "x > 28"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## For-loops##\n",
    "\n",
    "Python has a `for`.  The stucture is like this:\n",
    "\n",
    "<span style=\"color: green\"><span style=\"font-weight: bold\">for</span></span> <u>variable</u> <span style=\"color: green\"><span style=\"font-weight: bold\">in </span></span> <u>list or array</u>:\n",
    "    \n",
    "    body of loop\n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rainbow = make_array('red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet')\n",
    "\n",
    "for color in rainbow:\n",
    "    print(color)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for thing in rainbow:\n",
    "    print(thing)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_array = np.arange(1, 3.25, 0.25)\n",
    "\n",
    "## This for-loop is meaningless, don't try to figure out what's being computed\n",
    "## we just want to demonstrate that a for-loop can involve multiple steps\n",
    "\n",
    "for i in num_array:\n",
    "    i2 = i**2\n",
    "    i3 = i2 - 1\n",
    "    i4 = i3*(1.09)\n",
    "    print(i4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for k in np.arange(11):\n",
    "    print(k**3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_list = [1, 2, 3, 4, 5, 6, 7, 8]\n",
    "\n",
    "for k in num_list:\n",
    "    print((k - 1)**0.5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Appending Arrays\n",
    "\n",
    "We'll see that appending an array can be a good way to keep track to the results of multiple simulations.  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "first = np.arange(4)\n",
    "second = np.arange(10, 17)\n",
    "second"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.append(first, 6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "first"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.append(first, second)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "first"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "second"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "squares = make_array() # an empty array\n",
    "\n",
    "num_array = np.arange(11)\n",
    "\n",
    "for i in num_array:\n",
    "    squares = np.append(squares, i**2)\n",
    "    \n",
    "squares"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Simulation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's play a game: we each roll a die. \n",
    "\n",
    "If my number is bigger: you pay me a dollar.\n",
    "\n",
    "If they're the same: we do nothing.\n",
    "\n",
    "If your number is bigger: I pay you a dollar."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Steps:\n",
    "1. Find a way to simulate the roll of a die, then generalize to two dice.\n",
    "2. Compute how much money we win/lose based on the result.\n",
    "3. Do steps 1 and 2 10,000 times."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Random Selection\n",
    "\n",
    "The `np.random.choice` function can help here."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "die_faces = np.arange(1, 7)\n",
    "die_faces"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.choice(die_faces)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.random.choice(die_faces, 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Conditional Statements"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Work in progress\n",
    "def one_round(my_roll, your_roll):\n",
    "    if my_roll > your_roll:\n",
    "        return 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "one_round(4, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "one_round(2, 6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Final correct version\n",
    "def one_round(my_roll, your_roll):\n",
    "    if my_roll > your_roll:\n",
    "        return 1\n",
    "    elif your_roll > my_roll:\n",
    "        return -1\n",
    "    elif your_roll == my_roll:\n",
    "        return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "one_round(1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "one_round(6, 5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "one_round(7, -1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def simulate_one_round():\n",
    "    my_roll = np.random.choice(die_faces)\n",
    "    your_roll = np.random.choice(die_faces)\n",
    "    return one_round(my_roll, your_roll)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "simulate_one_round()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Repeated Betting ###"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = make_array()\n",
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = np.append(results, simulate_one_round())\n",
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "game_outcomes = make_array()\n",
    "\n",
    "for i in np.arange(5):\n",
    "    game_outcomes = np.append(game_outcomes, simulate_one_round())\n",
    "    \n",
    "game_outcomes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "game_outcomes = make_array()\n",
    "\n",
    "for i in np.arange(10000):\n",
    "    game_outcomes = np.append(game_outcomes, simulate_one_round())\n",
    "    \n",
    "game_outcomes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(game_outcomes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results = Table().with_column('My winnings', game_outcomes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "results.group('My winnings').barh('My winnings')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "game_outcomes = make_array()\n",
    "\n",
    "for i in np.arange(10000):\n",
    "    game_outcomes = np.append(game_outcomes, simulate_one_round())\n",
    "    \n",
    "results = Table().with_column('My winnings', game_outcomes)\n",
    "\n",
    "results.group('My winnings').barh('My winnings')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Would this game be a good way to make money? ##"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(results.column(0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Bonus question: This simulation is relatively simple. \n",
    "# Can you find a way to run it without using a for loop?\n",
    "\n",
    "my_rolls   = np.random.choice(np.arange(1,7), size = 10000)\n",
    "your_rolls = np.random.choice(np.arange(1,7), size = 10000)\n",
    "\n",
    "results = Table().with_columns(\"Mine\", my_rolls, \"Yours\", your_rolls)\n",
    "\n",
    "results = results.with_column(\"Results\", results.apply(one_round, \"Mine\", \"Yours\"))\n",
    "\n",
    "results.group(\"Results\")\n",
    "\n",
    "results.group(\"Results\").barh(\"Results\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### Another example: simulating heads in 100 coin tosses\n",
    "\n",
    "If 100 people individually flipped their own fair coin at the same time (or one very bored person flipped a fair coin 100 times), would it be reasonable if 40 or fewer of them came up heads?\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "coin = make_array('heads', 'tails')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(np.random.choice(coin, 100) == 'heads')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate one outcome\n",
    "\n",
    "def num_heads():\n",
    "    return sum(np.random.choice(coin, 100) == 'heads')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Decide how many times you want to repeat the experiment\n",
    "\n",
    "repetitions = 10000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate that many outcomes\n",
    "\n",
    "outcomes = make_array()\n",
    "\n",
    "for i in np.arange(repetitions):\n",
    "    outcomes = np.append(outcomes, num_heads())\n",
    "    \n",
    "heads = Table().with_column('Heads', outcomes)\n",
    "heads.hist(bins = np.arange(29.5, 70.6), right_end = 40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "heads = Table().with_column('Heads', outcomes)\n",
    "heads.hist(bins = np.arange(29.5, 70.6), right_end = 40)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "They yellow section; how many is that?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(heads.column(0)<=40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(outcomes <=40)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then what proportion is that?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "290/10000"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "What interval captures the middle 95% of these outcomes?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    " np.percentile(outcomes, make_array(2.5, 97.5))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Famous Monty Hall Problem ##\n",
    "\n",
    "On the game show, Let's Make a Deal, one of the more popular games was a simple guessing game involving three doors.  One door would hide a desireable prize (an expensive vacation, a new car, or something of similar value).  The other two doors would hide a fake prize, often a goat.  The way the game was played was simple:\n",
    "\n",
    "1. The player picks a door\n",
    "2. Monty Hall (the show's host) would ask that *a different* door be opened, revealing one of the two goats.\n",
    "3. Monty would offer the player the opportunity to switch to the *other* unopened door.  \n",
    "\n",
    "The mathematical/probability/statistical question is this: should the player switch doors?\n",
    "\n",
    "To put it another way, which player strategy has the higher likelihood of winning, picking a door and sticking with it, or picking a door and automatically switching once another door has been opened?\n",
    "\n",
    "**Strategy 1:** The pick & stick (pick a door and don't switch when given the change)\n",
    "\n",
    "**Strategy 2:** The pick & switch (pick a door, but automatically switch to the other when it's offered)\n",
    "\n",
    "Let's use simulations to decide which strategy is better.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "doors = make_array('car', 'first goat', 'second goat')\n",
    "\n",
    "goats = make_array('first goat', 'second goat')\n",
    "\n",
    "def other_goat(a_goat):\n",
    "    if a_goat == 'first goat':\n",
    "        return 'second goat'\n",
    "    elif a_goat == 'second goat':\n",
    "        return 'first goat'\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def monty_hall():\n",
    "    \n",
    "    contestant_choice = np.random.choice(doors)\n",
    "    \n",
    "    if contestant_choice == 'first goat':\n",
    "        monty_choice = 'second goat'\n",
    "        remaining_door = 'car'\n",
    "        \n",
    "    elif contestant_choice == 'second goat':\n",
    "        monty_choice = 'first goat'\n",
    "        remaining_door = 'car'\n",
    "        \n",
    "    elif contestant_choice == 'car':\n",
    "        monty_choice = np.random.choice(goats)\n",
    "        remaining_door = other_goat(monty_choice)\n",
    "        \n",
    "    return [contestant_choice, monty_choice, remaining_door]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "games = Table(['Strategy 1 Prize', 'Revealed', 'Strategy 2 Prize'])\n",
    "\n",
    "reps = 10000\n",
    "\n",
    "for i in range(reps):\n",
    "    games.append(monty_hall())\n",
    "    \n",
    "games"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(games.column('Strategy 1 Prize')=='car')/reps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sum(games.column('Strategy 2 Prize')=='car')/reps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}