{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datascience import *\n",
    "import numpy as np\n",
    "\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plots\n",
    "plots.style.use('fivethirtyeight')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Lecture 20"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Deflategate ##"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "football = Table.read_table('deflategate.csv')\n",
    "football.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "combined = (football.column('Blakeman')+football.column('Prioleau'))/2\n",
    "football = football.drop('Blakeman', 'Prioleau').with_column(\n",
    "    'Combined', \n",
    "    combined)\n",
    "football.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.ones(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "initial_pressure = np.append(12.5 * np.ones(11), 13 * np.ones(4))\n",
    "initial_pressure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "drop_values = initial_pressure - football.column(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "football = football.drop('Combined').with_column('Drop', drop_values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "football.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "means = football.group('Team', np.average)\n",
    "means"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "observed_difference = means.column(1).item(0) - means.column(1).item(1)\n",
    "observed_difference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def diff_between_means(tbl):\n",
    "    means = tbl.group('Team', np.average).column(1)\n",
    "    return means.item(0) - means.item(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "drops = football.select('Drop')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "shuffled_drops = drops.sample(with_replacement = False).column(0)\n",
    "shuffled_drops"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "simulated_football = football.with_column('Drop', shuffled_drops)\n",
    "simulated_football.show(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "diff_between_means(simulated_football)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "differences = make_array()\n",
    "\n",
    "for i in np.arange(5000):\n",
    "    shuffled_drops = drops.sample(with_replacement = False).column(0)\n",
    "    simulated_football = football.with_column('Drop', shuffled_drops)\n",
    "    new_diff = diff_between_means(simulated_football)\n",
    "    differences = np.append(differences, new_diff)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Table().with_column('Difference Between Means', differences).hist()\n",
    "plots.scatter(observed_difference, 0, color='red', s=40);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.average(differences <= observed_difference)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Analyzing RCTs ##"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Botulinum Toxin A (bta) as a treatment to chronic back pain\n",
    "# 15 in the treatment group\n",
    "# 16 in the control group (normal saline)\n",
    "# trials were run double-blind (neither doctors nor patients knew which group they were in)\n",
    "# Result = 1 indicates pain relief\n",
    "bta = Table.read_table('bta.csv')\n",
    "bta.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bta.group('Group', sum)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bta.group('Group', np.average)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "observed_outcomes = Table.read_table('observed_outcomes.csv')\n",
    "observed_outcomes.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bta.group('Group', np.average).column(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "abs(0.125 - 0.6)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def distance_between_group_proportions(tbl):\n",
    "    proportions = tbl.group('Group', np.average).column(1)\n",
    "    return abs(proportions.item(1) - proportions.item(0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "observed_distance = distance_between_group_proportions(bta)\n",
    "observed_distance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = bta.select('Group')\n",
    "results = bta.select('Result')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Repeat\n",
    "distances = make_array()\n",
    "for i in np.arange(2000):\n",
    "    shuffled_results = results.sample(with_replacement=False).column(0)\n",
    "    simulated = labels.with_column('Shuffled results', shuffled_results)\n",
    "    distance = distance_between_group_proportions(simulated)\n",
    "    distances = np.append(distances, distance)\n",
    "\n",
    "distances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Table().with_column('Distance', distances).hist(bins = np.arange(0, 1, 0.1))\n",
    "plots.scatter(observed_distance, 0, color='red', s=40);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.average(distances >= observed_distance)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}