{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from datascience import *\n",
"import numpy as np\n",
"\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plots\n",
"plots.style.use('fivethirtyeight')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Comparing Two Samples"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"births = Table.read_table('baby.csv')\n",
"#The table contains \n",
"\n",
"#the baby's birth weight in ounces\n",
"# the number of gestational days, \n",
"#the mother's age in completed years#\n",
"#the mother's height in inches,\n",
"#pregnancy weight in pounds\n",
"# and whether or not the mother smoked during pregnancy.\n",
"\n",
"\n",
"#The question we are looking at is the following:\n",
"#Does smoking have a effect on birth weight? \n",
"\n",
"#We will first compute the difference in average wights \n",
"#between the smokers and the nonsmokers\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-9.265999999999991"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# lets first get just the birthweigh and the smoker info\n",
"smoking_babies=births.select(\"Birth Weight\",\"Maternal Smoker\")\n",
"#lets save this table to a variable and use group to find \n",
"# how large each group is \n",
"smoking_babies.group('Maternal Smoker')\n",
"# we have more maternal smokers \n",
"\n",
"#Lets now find the average weight of each group\n",
"\n",
"smoking_babies.group('Maternal Smoker', np.average)\n",
"average_babies=smoking_babies.group('Maternal Smoker', np.average)\n",
"\n",
"#The observed statistic we will be lookin at is given by\n",
"# the difference of the smokers by the nonsmokers\n",
"\n",
"observed_statistic = 113.819-123.085\n",
"observed_statistic\n",
"\n",
"\n",
"#Question: What values of our statistic are in \n",
"#favor of the alternative: positive or negative?\n",
"\n",
"#the statistic is a negative value since a negatice number\n",
"#means the non smoker babies are heavier on average in this data so far"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"smoking_and_birthweight = births.select('Maternal Smoker', 'Birth Weight')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"smoking_and_birthweight.hist('Birth Weight', group='Maternal Smoker')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test Statistic\n",
"\n",
"[Question] What values of our statistic are in favor of the alternative: positive or negative?"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-9.266142572024918"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"average_babies\n",
"\n",
"#notice we hard coded the observed statistic\n",
"#we want to repeat this logic so we will rewrite it as \n",
"# a function\n",
"\n",
"#lets walk thourhg anoother way of computing the \n",
"#test statistic\n",
"\n",
"averageshold=average_babies.column(\"Birth Weight average\")\n",
"#now we wnat the difference of smokers - nonsmokers \n",
"observed_stat=averageshold.item(1)-averageshold.item(0)\n",
"\n",
"\n",
"#Select birth weight and smoking maternal\n",
"#Find averages of groups - needed a table\n",
"#Got column of averages - used .column\n",
"#found difference - \n",
"\n",
"def difference_of_averages(inputT):\n",
" BM=inputT.select(\"Birth Weight\",\"Maternal Smoker\")\n",
" average_table=BM.group(\"Maternal Smoker\",np.average)\n",
" means = average_table.column(\"Birth Weight average\")\n",
" return means.item(1)-means.item(0)\n",
"\n",
"#Computes differences of average birth weight between smokers\n",
"#and nonsmokers\n",
"difference_of_averages(births)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'means_table' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmeans\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmeans_table\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mobserved_difference\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmeans\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m-\u001b[0m \u001b[0mmeans\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mobserved_difference\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'means_table' is not defined"
]
}
],
"source": [
"means = means_table.column(1)\n",
"observed_difference = means.item(1) - means.item(0)\n",
"observed_difference"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def difference_of_means(table, label, group_label):\n",
" \"\"\"Takes: name of table, column label of numerical variable,\n",
" column label of group-label variable\n",
" Returns: Difference of means of the two groups\"\"\"\n",
" \n",
" #table with the two relevant columns\n",
" reduced = table.select(label, group_label) \n",
" \n",
" # table containing group means\n",
" means_table = reduced.group(group_label, np.average)\n",
" # array of group means\n",
" means = means_table.column(1)\n",
" \n",
" return means.item(1) - means.item(0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"difference_of_means(births, 'Birth Weight', 'Maternal Smoker')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Random Permutation (Shuffling)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#We want to see then if the observed statistic due to chance?\n",
"#i. is the p-value of -9.26 small or large? \n",
"\n",
"#to do so we assume that smoking has no real effect!\n",
"\n",
"#To set up our experiment we will use permutation\n",
"\n",
"#Again if smoking vs nonsmoking does not effect weight then\n",
"#we would expect the average birth weights to be close\n",
"\n",
"#avergesmokerwight - avergenonsmokerwight about 0\n",
"\n",
"\n",
"#lets reiew sample\n",
"\n",
"letters = Table().with_column('Letter', make_array('a', 'b', 'c', 'd', 'e'))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
" \n",
"
\n",
"
Letter
Permutated
\n",
"
\n",
" \n",
" \n",
"
\n",
"
a
d
\n",
"
\n",
"
\n",
"
b
b
\n",
"
\n",
"
\n",
"
c
a
\n",
"
\n",
"
\n",
"
d
e
\n",
"
\n",
"
\n",
"
e
c
\n",
"
\n",
" \n",
"
"
],
"text/plain": [
"Letter | Permutated\n",
"a | d\n",
"b | b\n",
"c | a\n",
"d | e\n",
"e | c"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# sample randomly chooses with replacement\n",
"letters.sample()\n",
"#if there is no sample size it just shuffules the entire sample\n",
"\n",
"#there is an optional argument where we can sample without replacement\n",
"letters.sample(with_replacement = False) #permutes all elements\n",
"#this makes sure we do not draw the same item twice\n",
"\n",
"#so if we would like to 'permuate an attribute' we can use sample without replacement\n",
"\n",
"permutated_letters=letters.sample(with_replacement = False).column(0)\n",
"letters.with_column('Permutated',permutated_letters)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
"
],
"text/plain": [
"Letter | Shuffled\n",
"a | d\n",
"b | c\n",
"c | e\n",
"d | a\n",
"e | b"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"letters.with_column('Shuffled', letters.sample(with_replacement = False).column(0))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Simulation Under Null Hypothesis"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"smoking_babies\n",
"#Shuffle all group labels\n",
"#Assign each shuffled label to a birth weight\n",
"#Find the difference between the averages of the two shuffled groups\n",
"#Repeat\n",
"permutated_smoking_attribute=smoking_babies.select(\"Maternal Smoker\").sample(with_replacement =False).column(0)\n",
"\n",
"smoking_babies.with_columns('Permutated Maternal Smoker',permutated_smoking_attribute)\n",
"#lets make this table to use our janky funciton\n",
"permutated=smoking_babies.select(\"Birth Weight\").with_columns('Maternal Smoker',permutated_smoking_attribute)\n",
"#still permutated labels\n",
"\n",
"#use our jank function\n",
"difference_of_averages(permutated)\n",
"# Very close to zero in comparions to -9\n",
"\n",
"#Now we will run simulation\n",
"\n",
"#Make function to simulates the expierment\n",
"def permutate_and_average():\n",
" #permutate thelables and save\n",
" perm=smoking_babies.select(\"Maternal Smoker\").sample(with_replacement =False).column(0)\n",
" #make table with weights and permutated labels then save\n",
" permutated_table=smoking_babies.select(\"Birth Weight\").with_columns('Maternal Smoker',perm)\n",
" #place our table into our function to compute average and return result\n",
" return difference_of_averages(permutated_table)\n",
"\n",
"permutate_and_average()\n",
"#use for loop to rerurn 1000 and record results\n",
"\n",
"#make array to save data\n",
"hold = make_array()\n",
"\n",
"for i in np.arange(1000):\n",
" new_average=permutate_and_average()\n",
" hold = np.append(hold,new_average)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"averages=Table().with_column('Averages',hold)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbEAAAEeCAYAAAAEmiuKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3XlcVPXeB/DPEWQRtFEZxw1wYRQxFQOXRx+VXPAqgntq9LqVa3hNLTdI0+RaYKKpheTVLC03BOwRr2maW6loVzN3G68BmjQsMuAM2xXm+cPHeZqA4SAzzBz4vF8vXq/m/H7z+305Tn48y/yOoNFo9CAiIpKgBtYugIiI6FkxxIiISLIYYkREJFkMMSIikiyGGBERSRZDjIiIJIshRkREkmW1ENuyZQv69esHd3d3uLu7Y9iwYThy5IihPSwsDDKZzOhn6NCh1iqXiIhskL21Jm7dujVWrlyJjh07oqysDLt370ZoaChOnjyJ559/HgAQEBCAzZs3G97j4OBgrXKJiMgGWS3EgoKCjF6/++67+Oyzz/Djjz8aQszR0REKhcIa5RERkQTYxDWx0tJSJCYmQqfToXfv3obt586dg5eXF/z8/DB37lxkZWVZvBaVSmXxOeor7lvL4v61HO5by6rJ/hWsuXbi9evXERgYiKKiIri4uGDLli0YPnw4ACAxMRHOzs7w9PREeno6Vq1ahbKyMpw8eRKOjo6VjskPGxFR3aFUKk22WzXESkpKcP/+feTl5eHAgQPYvn07Dh48CB8fn3J9MzIy0K1bN2zbtg0hISEWq0mlUlW50+jZcN9aFvev5XDfWlZN9q/VrokBT27U6NChAwCgZ8+euHTpEjZt2oRPPvmkXN9WrVqhdevWuHv3bm2XSURENsomrok9VVZWhpKSkgrbcnJykJGRwRs9iIjIwGpHYu+99x4CAwPRpk0baLVaJCQk4IcffkB8fDy0Wi2io6MREhIChUKB9PR0REZGQi6XY9SoUdYqmYiIbIzVQkytVmPmzJnIzMxEkyZN0LVrVyQkJGDIkCEoLCzEjRs3sGfPHuTl5UGhUGDAgAH4/PPP0bhxY2uVTERENsZqIRYXF1dpm7OzM5KSkmqxGiIikiKbuiZGRERUHVa9O5GIAHVWLrJz8032cWvaBAp501qqiEg6GGJEVpadm4/IjTtN9lk+N5QhRlQBnk4kIiLJYogREZFkMcSIiEiyGGJERCRZDDEiIpIshhgREUkWQ4yIiCSLIUZERJLFECMiIsliiBERkWQxxIiISLK4diKRBAgCcP2XNJN97AX+70z1Dz/1RBKQ96gA67ftN9nn7Wmja6kaItvB04lERCRZPBIjqiOcHB2rPOXI55JRXcMQI6oj8nWF+GR7vMk+fC4Z1TU8nUhERJLFECMiIsliiBERkWRZLcS2bNmCfv36wd3dHe7u7hg2bBiOHDliaNfr9YiKioK3tzdatmyJoKAg3Lx501rlEhGRDbJaiLVu3RorV67EqVOncOLECQwcOBChoaG4du0aAGDDhg2IjY3F6tWrcfz4ccjlcowdOxaPHj2yVslERGRjrBZiQUFBGDZsGDp06AAvLy+8++67cHV1xY8//gi9Xo+4uDjMnz8fo0ePho+PD+Li4qDVapGQkGCtkomIyMbYxDWx0tJSJCYmQqfToXfv3khLS4NarcbgwYMNfZydndGvXz+cP3/eipUSEZEtser3xK5fv47AwEAUFRXBxcUFX331Fbp27WoIKrlcbtRfLpcjIyPD5JgqlarGdZljDKoY9215Wl0JdDqdyT6PSx9X2QdAlX20Oi3/DJ4R95tlVbZ/lUqlyfdZNcSUSiW+//575OXl4cCBAwgLC8PBgwcN7YIgGPXX6/XltlU0Zk2oVKoaj0EV476t2PVf0uDi4mKyj72dfZV9AFTZx9XFFUqlZ7XqI352La0m+9eqIebg4IAOHToAAHr27IlLly5h06ZNWLhwIQAgMzMTbdu2NfTPzs4ud3RGRET1l01cE3uqrKwMJSUl8PT0hEKhwIkTJwxtRUVFOHfuHPr06WPFComIyJZY7UjsvffeQ2BgINq0aWO46/CHH35AfHw8BEFAWFgY1q5dC6VSCS8vL8TExMDFxQUTJkywVslERGRjrBZiarUaM2fORGZmJpo0aYKuXbsiISEBQ4YMAQDMmzcPhYWFWLRoETQaDfz8/JCUlITGjRtbq2QiIrIxVguxuLg4k+2CICAiIgIRERG1VBEREUmNTV0TIyIiqg6GGBERSRYfiklUjwgCTD79mU9+JqlhiBHVI3mPCrB+2/5K2/nkZ5Iank4kIiLJYogREZFkMcSIiEiyRIfYmTNnkJ2dXWl7Tk4Ozpw5Y5aiiIiIxBAdYsHBwUZrGf7ZqVOnEBwcbJaiiIiIxBAdYnq93mR7SUkJGjTg2UkiIqo9Jm+xz8/PR15enuH1w4cPce/evXL9NBoNEhMT0apVK/NXSEREVAmTIbZp0yZ8+OGHAKpey1Cv1+Pdd981f4VERESVMBliAQEBcHJygl6vR2RkJMaNG4du3boZ9REEAY0aNULPnj3h7+9v0WKJiIj+yGSI9e3bF3379gUAFBcXIzg4GF27dq2VwoiIiKoietmp8PBwS9ZBRERUbZWG2O7duwEAkydPhiAIhtdVmTJlinkqIyIiqkKlITZ79mwIgoDx48fDwcEBs2fPrnIwQRAYYkREVGsqDbGff/4ZAODg4GD0moiIyFZUGmIeHh4mXxMREVkbl9ggIiLJqtZDMU+ePInt27cjNTUVubm55ZaiEgQBly9fNmuBRERElREdYnFxcVi6dCnc3Nzg7++PLl26WLIuIiKiKokOsdjYWPTv3x+JiYmGmz1qYt26dUhOTsadO3fg4OAAf39/rFixAj4+PoY+YWFh5W7t9/f3x7Fjx2o8PxERSZ/oa2I5OTkYN26cWQIMAH744QdMmzYNR44cwYEDB2Bvb48xY8YgNzfXqF9AQABu375t+Nm3b59Z5iciIukTfSTm6+uL9PR0s02clJRk9Hrz5s3w8PBASkoKRowYYdju6OgIhUJhtnmJiKjuEH0k9v7772PXrl04ffq0RQrRarUoKyuDTCYz2n7u3Dl4eXnBz88Pc+fORVZWlkXmJyIi6RF9JBYVFYUmTZpgzJgx6NixI9zd3WFnZ2fURxAExMfHP1Mh4eHh6NatG3r37m3YNnToUAQHB8PT0xPp6elYtWoVQkJCcPLkSTg6OlY4jkqleqb5zT0GVYz7tjytrgQ6nc5kn8elj6vsA6DG42h1Wv4ZVYL7xbIq279KpdLk+0SH2K1btyAIAtq2bYvi4mLcuXOnXB9BEMQOZ+Sdd95BSkoKDh8+bBSM48ePN/x3165d4evri27duuHIkSMICQmpcKyqfuGqqFSqGo9BFeO+rdj1X9Lg4uJiso+9nX2VfQDUeBxXF1colZ5VzlPf8LNrWTXZv6JD7OrVq880QVUiIiKQlJSE5ORktGvXzmTfVq1aoXXr1rh7965FaiEiImmp1pedzW3JkiVISkrCwYMH0alTpyr75+TkICMjgzd6EBERgGqE2L1790T1c3d3F9Vv4cKF2Lt3L7766ivIZDKo1WoAT06HuLq6QqvVIjo6GiEhIVAoFEhPT0dkZCTkcjlGjRoltmwiIqrDRIdY9+7dRV3zevjwoajxtm7dCgAYPXq00fYlS5YgIiICdnZ2uHHjBvbs2YO8vDwoFAoMGDAAn3/+ORo3biy2bCIiqsNEh9gnn3xSLsRKS0uRlpaGPXv2oEWLFpg+fbroiTUajcl2Z2fnct8lIyIi+iPRIRYaGlpp2/z58zF48GBotVqzFEVERCSGWR7F4urqitDQUGzatMkcwxEREYlitueJNWzYEBkZGeYajoiIqEpmCbGrV6/i008/RefOnc0xHBERkSg1vjsxLy8P+fn5cHV1RWxsrFmLIyIiMkV0iPXv379ciAmCAJlMhg4dOmD8+PHlFu8lIiKypGo92ZmIiMiWmO3GDiIiotrGECMiIsliiBERkWQxxIiISLIYYkREJFmiQqyoqAirV6/G8ePHLV0PERGRaKJCzMnJCR999BHu379v6XqIiIhEE306sVu3brh7964layEiIqoW0SG2fPly7NixA0eOHLFkPURERKKJXrFj48aNkMlkmDJlClq3bo127drB2dnZqI8gCIiPjzd7kURERBURHWK3bt2CIAho27YtACA9Pb1cn4oWCCYiIrIU0SF29epVS9ZBRDZAEIDrv6SZ7OPWtAkU8qa1VBGRaaJDjIjqvrxHBVi/bb/JPsvnhjLEyGZU68vOpaWliI+Px5w5czBp0iRcu3YNAKDRaLB//378/vvvFimSiIioIqKPxPLy8jBu3DhcunQJrq6u0Ol0mD17NgCgcePGWLp0KSZPnozly5dbrFgiqVFn5SI7N99kn+KSklqqhqjuER1iK1euxK1bt7Bv3z707NkTXl5ehjY7OzsEBwfj6NGjokNs3bp1SE5Oxp07d+Dg4AB/f3+sWLECPj4+hj56vR7R0dHYvn07NBoN/Pz8EBMTgy5dulTjVySynuzcfERu3Gmyz/ypY2upGqK6R/TpxH/+85+YOXMmhg4dWuFdiB07dsS9e/dET/zDDz9g2rRpOHLkCA4cOAB7e3uMGTMGubm5hj4bNmxAbGysYckruVyOsWPH4tGjR6LnISKiukv0kZhGo0H79u0rbdfr9SipxmmRpKQko9ebN2+Gh4cHUlJSMGLECOj1esTFxWH+/PkYPXo0gCdPl1YqlUhISMDrr78uei4iIqqbRB+JeXh44MaNG5W2nzlzxugUY3VptVqUlZVBJpMBANLS0qBWqzF48GBDH2dnZ/Tr1w/nz59/5nmIiKjuEH0kNnHiRKxfvx7BwcGGa1JPTytu3rwZBw8exAcffPDMhYSHh6Nbt27o3bs3AECtVgMA5HK5UT+5XI6MjIxKx1GpVM9cgznHoIrVt32r1ZVAp9OZ7PO49LFZ+gCo8Thi5tHqtPXuzxGof5/d2lbZ/lUqlSbfJzrE3nrrLfzrX/9CSEgIvLy8IAgCwsPD8fDhQ6jVagQFBWHWrFnVq/r/vPPOO0hJScHhw4dhZ2dn1Pbn6296vd7kyiBV/cJVUalUNR6DKlYf9+31X9Lg4uJiso+9nb1Z+gCo8Thi5nF1cYVS6VllLXVJffzs1qaa7F/RIdawYUPEx8dj3759+PrrryEIAh4/fowePXpg3LhxeOmll55p2amIiAgkJSUhOTkZ7dq1M2xXKBQAgMzMTMNSVwCQnZ1d7uiMiIjqp2qv2DFx4kRMnDjRLJMvWbIESUlJOHjwIDp16mTU5unpCYVCgRMnTuCFF14A8OThnOfOnUNkZKRZ5iciIml7pmWnrl27Zrid3t3dHV27dq32UdjChQuxd+9efPXVV5DJZIZrYC4uLnB1dYUgCAgLC8PatWuhVCrh5eWFmJgYuLi4YMKECc9SNhER1THVCrHExESsWLECDx48gF6vB/DkmlXr1q2xYsWKah2hbd26FQAMt88/tWTJEkRERAAA5s2bh8LCQixatMjwZeekpCQ0bty4OmUTEVEdJTrEdu7ciTlz5kCpVGLlypXw8vKCXq/Hv//9b+zYsQOzZs1CSUkJQkNDRY2n0Wiq7CMIAiIiIgyhRkRE9EeiQ2zdunXw8/PDwYMH4eTkZNQ2Y8YMjBw5EuvWrRMdYkRERDUl+svOv/32GyZOnFguwADAyckJkyZNwoMHD8xaHBERkSmiQ8zb29vkl4wfPHiAzp07m6UoIiIiMUSHWGRkJLZv3479+8s/MC8xMRE7duzA3//+d7MWR0REZIroa2Iff/wxmjdvjmnTpiE8PBzt27eHIAi4e/cusrKy0LFjR2zcuBEbN240vEcQBMTHx1ukcCIiItEhduvWLQiCYFg94+n1L0dHR7Rt2xbFxcW4ffu20XueZQUPIiIisUSH2NWrVy1ZBxERUbWJviZGRERkaxhiREQkWQwxIiKSLIYYERFJFkOMiIgkiyFGRESSJTrEevTogUOHDlXafvjwYfTo0cMsRREREYkhOsTS09Oh0+kqbdfpdIYHZRIREdWGap1ONLUCx507d/iwSiIiqlUmV+zYtWsXdu/ebXgdExOD7du3l+un0Whw48YNDB8+3PwVEhERVcJkiOl0OqjVasPrvLw8lJWVGfURBAGNGjXCq6++ivDwcMtUSUREVAGTITZjxgzMmDEDANC9e3dER0dj5MiRtVIYERFRVUQvAHzlyhVL1kFERFRtokPsqUePHuH+/fvIzc2FXq8v196/f3+zFEZERFQV0SGWm5uLJUuWYP/+/SgtLS3XrtfrIQgCHj58aNYCiYiIKiM6xN566y0cPHgQM2bMQP/+/SGTyWo8+ZkzZ/Dxxx/j559/RkZGBmJjYxEaGmpoDwsLM7o7EgD8/f1x7NixGs9NRETSJzrEjh07hlmzZuH999832+Q6nQ4+Pj6YMmUK3njjjQr7BAQEYPPmzYbXDg4OZpufiIikTXSIOTg4oGPHjmadPDAwEIGBgQCA2bNnV9jH0dERCoXCrPMSEVHdIHrFjtGjR+Po0aOWrKVC586dg5eXF/z8/DB37lxkZWXVeg1ERGSbRB+Jvfnmm5g2bRreeOMNTJs2De7u7rCzsyvXTy6Xm624oUOHIjg4GJ6enkhPT8eqVasQEhKCkydPwtHRscL3qFSqGs9rjjGoYvVt32p1JSbXHAWAx6WPzdIHQI3HETOPVqetd3+OQP377Na2yvavUqk0+T7RIebn5wdBEHD58mXEx8dX2s+cdyeOHz/e8N9du3aFr68vunXrhiNHjiAkJKTC91T1C1dFpVLVeAyqWH3ct9d/SYOLi4vJPvZ29mbpA6DG44iZx9XFFUqlZ5W11CX18bNbm2qyf0WH2OLFi00uAFwbWrVqhdatW+Pu3btWrYOIiGyD6BCLiIiwZB2i5OTkICMjgzd6EBERgGdYsQMASktLkZeXhyZNmsDe/pmGAABotVrDUVVZWRnu37+PK1euoGnTpmjatCmio6MREhIChUKB9PR0REZGQi6XY9SoUc88JxER1R3Vep7YpUuXMGbMGLRu3RpeXl44c+YMgCdHSC+99BJOnTpVrcl/+uknDBw4EAMHDkRhYSGioqIwcOBAfPDBB7Czs8ONGzfw8ssvw9/fH2FhYfDy8sK3337L55YRERGAahyJXbhwwXBUNHnyZOzYscPQ1rx5c2i1Wnz55ZcYNGiQ6MkHDBgAjUZTaXtSUpLosYiIqP4RfST297//HR07dsT58+exfPnycu0DBgzAv/71L7MWR0REZIroELt06RJeeeUVODk5VXiXYps2bYweoElERGRpokOsQYMGaNCg8u5qtRrOzs5mKYqIiEgM0SHm6+uLw4cPV9hWUlKCffv2oXfv3mYrjIiIqCqiQ+ztt9/G6dOnMWfOHFy9ehUA8Pvvv+PYsWMICQnBr7/+igULFlisUCIioj8TfXfiiy++iM2bN2PRokXYtWsXgCfP+9Lr9XjuueewdetW9OrVy2KFEhER/Vm1vqk8YcIEjBw5EidOnMC///1vlJWVoX379hgyZAhcXV0tVSMREVGFqr3cRqNGjRAUFGSJWoiIiKpF9DWxQ4cOYdGiRZW2L1q0qNIbP4iIiCxBdIh9/PHHKCgoqLS9qKgIGzZsMEtRREREYogOsRs3bsDX17fS9h49euDWrVtmKYqIiEgM0SH2+PFjFBYWVtpeWFiI4uJisxRFREQkhugQ8/HxwYEDB1BWVlauraysDAcOHIC3t7dZiyMiIjJFdIi98cYbuHjxIqZMmYLLly+juLgYxcXFuHz5Ml5++WVcvHgRs2bNsmStRERERkTfYj9+/Hj8+uuviIqKwtGjRwEAgiBAr9dDEAQsWbIEkyZNslihREREf1at74ktXLgQEyZMQHJyMlJTU6HX69G+fXsEBwejXbt2FiqRiIioYqJCrLi4GElJSejUqRP8/Pzw5ptvWrouIiKiKom6Jubo6Ih58+YZFv4lIiKyBaJv7FAqlXzoJRER2RTRIbZ48WJs2bIF169ft2Q9REREoom+seP06dOQy+UYOHAgevfujfbt25d7krMgCIiJiTF7kURERBURHWLbtm0z/HdKSgpSUlLK9WGIERFRbRJ9OjE3N7fKn4cPH1Zr8jNnzmDy5Mno0qULZDIZdu7cadSu1+sRFRUFb29vtGzZEkFBQbh582a15iAiorpLdIhZgk6ng4+PD6Kjo8udmgSADRs2IDY2FqtXr8bx48chl8sxduxYPHr0yArVEhGRral2iKWkpODDDz/EokWLcOfOHQBPwujixYvIz8+v1liBgYFYvnw5Ro8ejQYNjEvR6/WIi4vD/PnzMXr0aPj4+CAuLg5arRYJCQnVLZuIiOog0SFWUlKCV155BSNHjkRUVBQ+++wz/PbbbwAAOzs7TJgwAf/4xz/MVlhaWhrUajUGDx5s2Obs7Ix+/frh/PnzZpuHiIikS/SNHVFRUThy5AjWrFmDQYMGoVevXoY2JycnjBkzBt988w0WLlxolsKefidNLpcbbZfL5cjIyKj0fSqVqsZzm2MMqlh927daXQl0Op3JPo9LH5ulD4AajyNmHq1OW+/+HIH699mtbZXtX6VSafJ9okNs3759eO211zBt2rQKb+BQKpU4cOCA2OFEEwTB6PXTBYcrU9UvXBWVSlXjMahidXHfqrNykZ1b+Wn0hg0BFxcXk2PY29mbpQ9Q87nEzOPq4gql0rPKWuqSuvjZtSU12b+iQywrKwvdunWrtN3R0VHUvxTFUigUAIDMzEy0bdvWsD07O7vc0RmRtWTn5iNy485K2+dPHVuL1RDVP6KviSkUCqSmplbafvHiRXh6mu9fZ56enlAoFDhx4oRhW1FREc6dO4c+ffqYbR4iIpIu0SEWEhKCzz//3HBHIvD/p/q++eYb7Nu3D+PGjavW5FqtFleuXMGVK1dQVlaG+/fv48qVK7h37x4EQUBYWBjWr1+PAwcO4MaNG5g9ezZcXFwwYcKEas1DRER1k+jTiUuWLMHp06cxaNAg9OnTB4IgYN26dYiMjMSlS5fg5+eHefPmVWvyn376CcHBwYbXUVFRiIqKwpQpUxAXF4d58+ahsLAQixYtgkajgZ+fH5KSktC4ceNqzUNERHWT6BBr3Lgxvv32W8TGxuLrr7+Gk5MTUlJS0L59e0RERODNN9+Ek5NTtSYfMGAANBpNpe2CICAiIgIRERHVGpeIiOqHaj3Z2cnJCQsWLMCCBQssVQ8REZFoVYZYcXExDh06hNTUVDRr1gzDhw9Hy5Yta6M2IiIik0yGmFqtxsiRI/Hrr79Cr9cDABo1aoT4+Hj079+/VgokIiKqjMm7E1etWoXU1FTMnj0be/fuRVRUFJycnLB48eLaqo+IiKhSJo/Ejh8/jilTpmDVqlWGbS1atMD06dPx22+/oU2bNhYvkIiIqDImj8TUanW5Lxb37dsXer0e9+/ft2hhREREVTEZYqWlpeVum3/6uqioyHJVERERiVDl3Ympqam4ePGi4fXTZ4apVCq4urqW6+/n52fG8ojI1ggCcP2XNJN93Jo2gULetJYqovqsyhB7uorGn/355o6nq8tXtMI9EdUdeY8KsH7bfpN9ls8NZYhRrTAZYrGxsbVVBxERUbWZDLGXX365tuogIiKqNtGr2BMREdkahhgREUkWQ4yIiCSLIUZERJLFECMiIsmq1vPEiIjE4BeiqbYwxIjI7PiFaKotPJ1IRESSxRAjIiLJYogREZFk2XSIRUVFQSaTGf106tTJ2mUREZGNsPkbO5RKJQ4ePGh4bWdnZ8VqiIjIlth8iNnb20OhUFi7DCIiskE2fToRePJQzi5duqB79+6YOnUqUlNTrV0SERHZCJs+EvP398emTZugVCqRnZ2NNWvWIDAwECkpKWjWrFmF71GpVDWe1xxjUMXq2r7V6kqg0+kqbX9c+thkuzn7AKjxOLVZr1anldTnQUq1SlFl+1epVJp8n02H2LBhw4xe+/v7w9fXF7t27cKcOXMqfE9Vv3BVVCpVjcegitXFfXv9lzS4uLhU2m5vZ2+y3Zx9ANR4nNqs19XFFUqlp8k+tqIufnZtSU32r82fTvwjV1dXeHt74+7du9YuhYiIbICkQqyoqAgqlYo3ehAREQAbP524bNky/OUvf0Hbtm0N18QKCgowZcoUa5dGREQ2wKZD7MGDB5g+fTpycnLg5uYGf39/HD16FB4eHtYujYiIbIBNh9i2bdusXQIREdkwSV0TIyIi+iObPhIjsiZ1Vi6yc/NN9ikuKamlaoioIgwxokpk5+YjcuNOk33mTx1bS9UQUUV4OpGIiCSLIUZERJLFECMiIsliiBERkWQxxIiISLIYYkREJFkMMSIikiyGGBERSRZDjIiIJIshRkREksUQIyIiyWKIERGRZDHEiIhIsriKPdVLfMwKUd3AEKN6iY9ZIaobGGJEZBWCAFz/Ja3SdremTaCQN63FikiKGGJEZBV5jwqwftv+StuXzw1liFGVeGMHERFJFo/EiMgmVXW6EeApR5JIiG3duhUbN26EWq2Gt7c3oqKi0K9fP2uXRTaKdx7WDVWdbgR4ypEkEGJJSUkIDw/H2rVr0bdvX2zduhUTJ05ESkoK3N3drV0e2SDeeUhUf9j8NbHY2Fi8/PLLePXVV9G5c2esWbMGCoUC27Zts3ZpRERkZYJGo9Fbu4jKlJSUoFWrVvjss88wZswYw/aFCxfixo0bOHTokBWrIyIia7PpI7GcnByUlpZCLpcbbZfL5cjMzLRSVUREZCtsOsSeEgTB6LVery+3jYiI6h+bDrHmzZvDzs6u3FFXdnZ2uaMzIiKqf2w6xBwcHODr64sTJ04YbT9x4gT69OljpaqIiMhW2Pwt9n/7298wa9Ys+Pn5oU+fPti2bRt+//13vP7669YujYiIrMymj8QAYNy4cYiKisKaNWswYMAApKSkID4+Hh4eHmab44svvsCoUaPg4eEBmUyGtLTyqwRoNBrMnDkTHh4e8PDwwMyZM6HRaMxWQ30SFBQEmUxm9DN16lRrlyUq1TXgAAAMbElEQVRZW7duRffu3aFQKDBo0CCcPXvW2iXVCVFRUeU+p506dbJ2WZJ15swZTJ48GV26dIFMJsPOncbf5dTr9YiKioK3tzdatmyJoKAg3Lx5s8pxbT7EAGD69Om4evUqMjMzcerUKfTv39+s4xcUFGDw4MEIDw83WcOVK1ewb98+JCQk4MqVK5g1a5ZZ66hPQkNDcfv2bcPPRx99ZO2SJOnpYgALFizA6dOn0bt3b0ycOBH37t2zdml1glKpNPqc8h8Iz06n08HHxwfR0dFwdnYu175hwwbExsZi9erVOH78OORyOcaOHYtHjx6ZHNfmTyfWhtmzZwMAfvrppwrbb9++jWPHjuHw4cOGa3EfffQRRowYAZVKBaVSWWu11hWNGjWCQqGwdhmS98fFAABgzZo1+O6777Bt2zasWLHCytVJn729PT+nZhIYGIjAwEAA//937lN6vR5xcXGYP38+Ro8eDQCIi4uDUqlEQkKCyctHkjgSs7YLFy7A1dXV6GaSvn37wsXFBefPn7diZdKVmJiIDh06oG/fvli2bFmV/9qi8kpKSnD58mUMHjzYaPvgwYP5uTST1NRUdOnSBd27d8fUqVORmppq7ZLqpLS0NKjVaqPPsrOzM/r161flZ5lHYiJkZmaiefPmRt9NEwQBbm5u/NL1M5g4cSLc3d3RsmVL3Lp1CytXrsS1a9fw9ddfW7s0SeFiAJbl7++PTZs2QalUIjs7G2vWrEFgYCBSUlLQrFkza5dXp6jVagCo8LOckZFh8r11NsRWrVqFmJgYk32Sk5MxYMAAUeNV9OVqfun6/1Vnf7/22muGbV27dkW7du0wZMgQXL58Gb6+vhautO7hYgCWMWzYMKPX/v7+8PX1xa5duzBnzhwrVVW3Pctnuc6GWFhYGF566SWTfdq2bStqrBYtWiA7O9toh+r1euTk5PBL1/+nJvu7Z8+esLOzw927dxli1cDFAGqXq6srvL29cffuXWuXUuc8ve6YmZlp9PeEmM9ynQ2x5s2bo3nz5mYZq3fv3tBqtbhw4YLhutiFCxeg0+n4pev/U5P9ff36dZSWlvICejX9cTGAPy6QfeLECYSEhFixsrqpqKgIKpVK9NkbEs/T0xMKhQInTpzACy+8AODJ/j537hwiIyNNvrfOhlh1qNVqqNVq3LlzB8CTuxHz8vLg7u6Opk2bonPnzhg6dCjeeustbNiwAXq9Hm+99RaGDx/OOxOr6ddff0V8fDwCAwPRrFkz3L59G8uWLUP37t3Rt29fa5cnOVwMwHKWLVuGv/zlL2jbtq3hmlhBQQGmTJli7dIkSavVGo5iy8rKcP/+fVy5cgVNmzaFu7s7wsLCsHbtWiiVSnh5eSEmJgYuLi6YMGGCyXFt+lEstSUqKgqrV68utz02NhahoaEAgNzcXCxZsgTffPMNAGDEiBH48MMPIZPJarVWqbt//z5mzpyJmzdvQqfToU2bNggMDER4eDiaNuUTep/F1q1bsWHDBqjVanTp0gUffPCB2b9LWR9NnToVZ8+eRU5ODtzc3ODv74+lS5fC29vb2qVJ0vfff4/g4OBy26dMmYK4uDjo9XpER0fjiy++gEajgZ+fH2JiYuDj42NyXIYYERFJFr8nRkREksUQIyIiyWKIERGRZDHEiIhIshhiREQkWQwxIiKSLIYYUTW9+eabkMlkeOedd6xdClG9x++JEVVDYWEhOnfujPz8fMjlcty8eRP29lz4hshaeCRGVA0HDx5Efn4+AgMDkZWVhWPHjtXq/P/5z3+g1/PfnURPMcSIqmH37t2QyWTYtGkTnJ2dsWfPHkPb/v37IZPJcO3atXLvmzBhAv77v//b8Prx48dYt24devXqhRYtWsDb2xtLly5FUVGRoU9aWhpkMhm2bt2K5cuXw9vbGy1atEBeXh6ys7Mxf/58+Pn5oVWrVujatSumT5+OBw8elJs7ISEBvXr1gkKhQL9+/XDo0CEEBQUhKCjIqF9OTg7efvttdOnSBS1atECvXr3wxRdfGPVRq9V44403DLV07twZkyZNQlZW1rPuUqIa4XkQIpEyMjJw8uRJvPbaa3Bzc0NQUBCSk5Oh0Wggk8kwYsQINGnSBPHx8Xj++ecN78vMzMTJkyexYsUKw7aZM2fi8OHDmDdvHvr06YPbt2/j/fffR3p6Or788kujedeuXYuePXti/fr1KC0thaOjI7KysuDo6Ijly5fDzc0Nv//+Oz755BMMHz4cP/74I5ycnAA8WdF+xowZGDFiBFatWoWcnBxERESguLgYHTt2NMyRn5+P4cOHo6ioCOHh4fD09MR3332Ht99+G8XFxZg1axYAYNasWbh37x4iIyPRpk0bZGVl4dSpUygoKLDkrieqFEOMSKS9e/eirKwMkydPBvBk4dKEhAQkJSVh6tSpcHJywpgxY5CQkID33nsPDRo8OdGRkJAAvV6PiRMnAgDOnj2LpKQkxMXFGVZEDwgIQNOmTTFz5kxcuXIF3bt3N8wrl8uxc+dOo4cDKpVKo0WrS0tL0adPHzz//PM4evSoYaHVqKgoeHt7G73fx8cHAQEBRiH26aef4t69ezh79qxhe0BAAPLy8rB69WpMmzYN9vb2+PHHH/Huu+8aPTvuj4+BIaptPJ1IJNKePXvQsWNH9O7dG8CTv+RbtWpldEpx0qRJePDgAU6fPm3YtnfvXgQEBKBly5YAgO+++w4ODg4ICQnB48ePDT+DBw8G8CTk/igoKKjCp9t+9tln6N+/P9q0aYPmzZsbjv6ePlKotLQUP/30E4KDg43e7+vrC09PT6OxvvvuO/j5+cHT09OopiFDhuDhw4e4desWgCcPMP34448RFxeH69ev8/ocWR1DjEiES5cu4datWwgODoZGo4FGo8GjR48watQoXLhwwRAc/fr1g4eHhyHYbt++jZ9//hmTJk0yjJWVlYWSkhK0adMGbm5uhh8vLy8AwMOHD43mfhp+f7R582YsWLAAAQEB+PLLL3H8+HHDTSZPr6vl5OTgP//5T4VPxm3RooXR66ysLJw9e9aoHjc3N7z66qtGNX3++ecYMWIENm7ciP79+6NLly5YvXo1ysrKqr9TicyApxOJRNi9ezcAYP369Vi/fn259j179mDZsmUQBAEvvfQSPv30UxQUFGDv3r1wdXXFqFGjDH2bNWsGJycnw7Pp/uzPoVXRUVhSUhIGDRqE999/37AtNTXVqE/z5s3RsGHDCm+6+PNj4Js1awa5XI7o6OgKa3oasHK5HDExMYiJiYFKpcLu3bsRFRUFNzc3TJs2rcL3ElkSj8SIqlBSUoLExET4+/sjOTm53E+3bt2wZ88ew6m1yZMnQ6vVIjk5GfHx8QgODkajRo0M4w0ZMgRFRUXIz89Hz549y/20atWqypoKCgrQsGFDo207d+40em1nZ4eePXsiOTnZ6LTf5cuXkZaWZtR3yJAh+OWXX9C2bdsKa2rcuHG5GpRKJZYvXw6ZTIabN29WvSOJLIBHYkRVOHz4MB4+fIhVq1ZhwIAB5dpff/11vP322/j+++8xcOBAeHl5wd/fHytXrsSDBw8MN4I8NWDAAEyYMAF//etf8be//Q1+fn5o0KAB0tPT8e2332LlypWGI5/KDB06FOvXr8fatWvh5+eH06dP43/+53/K9YuIiMDYsWMRGhqK1157DTk5OYiOjoZCoTDceAIAs2fPxv79+zFixAjMnj0bXl5eKCgogEqlwtmzZ7F7927k5eVhzJgxmDhxIjp16oSGDRvin//8JzQaDV588cVn3LtENcMQI6rC7t270bhx40rvwhs/fjyWLl2K3bt3Y+DAgQCe3OCxaNEitG7dusLg+8c//oHNmzfjq6++wtq1a+Ho6Ah3d3cMGTKkwmtYf7Z48WLk5eVh06ZNKC4uRv/+/ZGYmAhfX1+jfi+++CK2bNmC1atX45VXXkGHDh2watUqfPjhh2jSpImh33PPPYdvv/0Wq1evxvr165GRkYHnnnsOSqXScKejk5MTevTogR07duDevXto0KABvLy8sGXLlnLfOSOqLVx2iqie+e233/DCCy9gwYIFWLx4sbXLIaoRHokR1WGFhYVYunQpBg0ahObNmyM1NRUbN26Es7Mz/vrXv1q7PKIaY4gR1WF2dnZQq9VYvHgxHj58iEaNGuG//uu/8MUXX1R46z6R1PB0IhERSRZvsSciIsliiBERkWQxxIiISLIYYkREJFkMMSIikiyGGBERSdb/Ajm6wlvdzUB9AAAAAElFTkSuQmCC\n",
"text/plain": [
"
"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"averages.hist(bins=np.arange(-10,10,.5))\n",
"#most of the averages are between -4 and 4 pounds\n",
"#it seems like our ouserved statistic is not random\n",
"#since our simulated values are far away!\n",
"\n",
"#lets try to compute the p-value\n",
"\n",
"np.count_nonzero(hold<=difference_of_averages(births))\n",
"#The p-value is zero!!\n",
"#Since there are no simulated averages that are less then -9.16\n",
"#There for the p-value is extermely low and smoking does have an effect!"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"