{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# HIDDEN\n", "from datascience import *\n", "%matplotlib inline\n", "import matplotlib.pyplot as plots\n", "plots.style.use('fivethirtyeight')\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# HIDDEN\n", "\n", "def standard_units(any_numbers):\n", " \"Convert any array of numbers to standard units.\"\n", " return (any_numbers - np.mean(any_numbers))/np.std(any_numbers) \n", "\n", "def correlation(t, x, y):\n", " return np.mean(standard_units(t.column(x))*standard_units(t.column(y)))\n", "\n", "def slope(table, x, y):\n", " r = correlation(table, x, y)\n", " return r * np.std(table.column(y))/np.std(table.column(x))\n", "\n", "def intercept(table, x, y):\n", " a = slope(table, x, y)\n", " return np.mean(table.column(y)) - a * np.mean(table.column(x))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "galton = Table.read_table('galton.csv')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
family | father | mother | midparentHeight | children | childNum | gender | childHeight | \n", "
---|---|---|---|---|---|---|---|
1 | 78.5 | 67 | 75.43 | 4 | 1 | male | 73.2 | \n", "
1 | 78.5 | 67 | 75.43 | 4 | 2 | female | 69.2 | \n", "
1 | 78.5 | 67 | 75.43 | 4 | 3 | female | 69 | \n", "
1 | 78.5 | 67 | 75.43 | 4 | 4 | female | 69 | \n", "
2 | 75.5 | 66.5 | 73.66 | 4 | 1 | male | 73.5 | \n", "
2 | 75.5 | 66.5 | 73.66 | 4 | 2 | male | 72.5 | \n", "
2 | 75.5 | 66.5 | 73.66 | 4 | 3 | female | 65.5 | \n", "
2 | 75.5 | 66.5 | 73.66 | 4 | 4 | female | 65.5 | \n", "
3 | 75 | 64 | 72.06 | 2 | 1 | male | 71 | \n", "
3 | 75 | 64 | 72.06 | 2 | 2 | female | 68 | \n", "
... (924 rows omitted)