{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# HIDDEN\n",
    "from datascience import *\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plots\n",
    "plots.style.use('fivethirtyeight')\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# HIDDEN\n",
    "\n",
    "def standard_units(any_numbers):\n",
    "    \"Convert any array of numbers to standard units.\"\n",
    "    return (any_numbers - np.mean(any_numbers))/np.std(any_numbers)  \n",
    "\n",
    "def correlation(t, x, y):\n",
    "    return np.mean(standard_units(t.column(x))*standard_units(t.column(y)))\n",
    "\n",
    "def slope(table, x, y):\n",
    "    r = correlation(table, x, y)\n",
    "    return r * np.std(table.column(y))/np.std(table.column(x))\n",
    "\n",
    "def intercept(table, x, y):\n",
    "    a = slope(table, x, y)\n",
    "    return np.mean(table.column(y)) - a * np.mean(table.column(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "galton = Table.read_table('galton.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "    <thead>\n",
       "        <tr>\n",
       "            <th>family</th> <th>father</th> <th>mother</th> <th>midparentHeight</th> <th>children</th> <th>childNum</th> <th>gender</th> <th>childHeight</th>\n",
       "        </tr>\n",
       "    </thead>\n",
       "    <tbody>\n",
       "        <tr>\n",
       "            <td>1     </td> <td>78.5  </td> <td>67    </td> <td>75.43          </td> <td>4       </td> <td>1       </td> <td>male  </td> <td>73.2       </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "        <tr>\n",
       "            <td>1     </td> <td>78.5  </td> <td>67    </td> <td>75.43          </td> <td>4       </td> <td>2       </td> <td>female</td> <td>69.2       </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "        <tr>\n",
       "            <td>1     </td> <td>78.5  </td> <td>67    </td> <td>75.43          </td> <td>4       </td> <td>3       </td> <td>female</td> <td>69         </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "        <tr>\n",
       "            <td>1     </td> <td>78.5  </td> <td>67    </td> <td>75.43          </td> <td>4       </td> <td>4       </td> <td>female</td> <td>69         </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "        <tr>\n",
       "            <td>2     </td> <td>75.5  </td> <td>66.5  </td> <td>73.66          </td> <td>4       </td> <td>1       </td> <td>male  </td> <td>73.5       </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "        <tr>\n",
       "            <td>2     </td> <td>75.5  </td> <td>66.5  </td> <td>73.66          </td> <td>4       </td> <td>2       </td> <td>male  </td> <td>72.5       </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "        <tr>\n",
       "            <td>2     </td> <td>75.5  </td> <td>66.5  </td> <td>73.66          </td> <td>4       </td> <td>3       </td> <td>female</td> <td>65.5       </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "        <tr>\n",
       "            <td>2     </td> <td>75.5  </td> <td>66.5  </td> <td>73.66          </td> <td>4       </td> <td>4       </td> <td>female</td> <td>65.5       </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "        <tr>\n",
       "            <td>3     </td> <td>75    </td> <td>64    </td> <td>72.06          </td> <td>2       </td> <td>1       </td> <td>male  </td> <td>71         </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "        <tr>\n",
       "            <td>3     </td> <td>75    </td> <td>64    </td> <td>72.06          </td> <td>2       </td> <td>2       </td> <td>female</td> <td>68         </td>\n",
       "        </tr>\n",
       "    </tbody>\n",
       "</table>\n",
       "<p>... (924 rows omitted)</p"
      ],
      "text/plain": [
       "family | father | mother | midparentHeight | children | childNum | gender | childHeight\n",
       "1      | 78.5   | 67     | 75.43           | 4        | 1        | male   | 73.2\n",
       "1      | 78.5   | 67     | 75.43           | 4        | 2        | female | 69.2\n",
       "1      | 78.5   | 67     | 75.43           | 4        | 3        | female | 69\n",
       "1      | 78.5   | 67     | 75.43           | 4        | 4        | female | 69\n",
       "2      | 75.5   | 66.5   | 73.66           | 4        | 1        | male   | 73.5\n",
       "2      | 75.5   | 66.5   | 73.66           | 4        | 2        | male   | 72.5\n",
       "2      | 75.5   | 66.5   | 73.66           | 4        | 3        | female | 65.5\n",
       "2      | 75.5   | 66.5   | 73.66           | 4        | 4        | female | 65.5\n",
       "3      | 75     | 64     | 72.06           | 2        | 1        | male   | 71\n",
       "3      | 75     | 64     | 72.06           | 2        | 2        | female | 68\n",
       "... (924 rows omitted)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "galton"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.060366119036644697"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correlation(galton, 'father', 'mother')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def galton_multiple_mse(a, b, c):\n",
    "    y = galton.column('childHeight')\n",
    "    fitted = a*galton.column('father') + b*galton.column('mother') + c\n",
    "    return np.mean((y - fitted) ** 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "best_quad = minimize(galton_multiple_mse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([  0.36828233,   0.29050997,  22.64327978])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_quad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.266038538922719"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correlation(galton, 'father', 'childHeight')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.20132194862210062"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "correlation(galton, 'mother', 'childHeight')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([  0.36828233,   0.29050997,  22.64327978])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_quad"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "22.636240549589751"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "intercept(galton, 'midparentHeight', 'childHeight')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.63736089696947895"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "slope(galton, 'midparentHeight', 'childHeight')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6587923058163002"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_quad.item(0) + best_quad.item(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6372730487145057"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_quad.item(0) + best_quad.item(1)/1.08"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}