{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# HIDDEN\n", "from datascience import *\n", "%matplotlib inline\n", "import matplotlib.pyplot as plots\n", "plots.style.use('fivethirtyeight')\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# HIDDEN\n", "\n", "def standard_units(any_numbers):\n", " \"Convert any array of numbers to standard units.\"\n", " return (any_numbers - np.mean(any_numbers))/np.std(any_numbers) \n", "\n", "def correlation(t, x, y):\n", " return np.mean(standard_units(t.column(x))*standard_units(t.column(y)))\n", "\n", "def slope(table, x, y):\n", " r = correlation(table, x, y)\n", " return r * np.std(table.column(y))/np.std(table.column(x))\n", "\n", "def intercept(table, x, y):\n", " a = slope(table, x, y)\n", " return np.mean(table.column(y)) - a * np.mean(table.column(x))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "galton = Table.read_table('galton.csv')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
family father mother midparentHeight children childNum gender childHeight
1 78.5 67 75.43 4 1 male 73.2
1 78.5 67 75.43 4 2 female 69.2
1 78.5 67 75.43 4 3 female 69
1 78.5 67 75.43 4 4 female 69
2 75.5 66.5 73.66 4 1 male 73.5
2 75.5 66.5 73.66 4 2 male 72.5
2 75.5 66.5 73.66 4 3 female 65.5
2 75.5 66.5 73.66 4 4 female 65.5
3 75 64 72.06 2 1 male 71
3 75 64 72.06 2 2 female 68
\n", "

... (924 rows omitted)