{
 "metadata": {
  "name": "",
  "signature": "sha256:9ee31b1ba23d6574c7a962640eb79c5527b3a92f67cc238fadd74a426f142714"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "#Predicting Student Performance\n",
      "\n",
      "###A data science experiment using data from the KDD 2010 Educational Data Mining Challenge\n",
      "\n",
      "The aim of this IPython Notebook is to show how we can use Python to build predictive algorithms that solve data science problems in the arena of education.\n",
      "\n",
      "**This notebook is still heavily under construction**"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd\n",
      "import numpy as np\n",
      "import matplotlib.pyplot as plt"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Get the data: Algebra 2005-2006\n",
      "train_filepath = 'data/algebra0506/algebra_2005_2006_train.txt'\n",
      "test_filepath  = 'data/algebra0506/algebra_2005_2006_test.txt'\n",
      "traindata = pd.read_table(train_filepath)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Some more information the data format can be found on the [challenge website](https://pslcdatashop.web.cmu.edu/KDDCup/rules_data_format.jsp)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Inspect some of the training data\n",
      "traindata.head()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
        "<table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>Row</th>\n",
        "      <th>Anon Student Id</th>\n",
        "      <th>Problem Hierarchy</th>\n",
        "      <th>Problem Name</th>\n",
        "      <th>Problem View</th>\n",
        "      <th>Step Name</th>\n",
        "      <th>Step Start Time</th>\n",
        "      <th>First Transaction Time</th>\n",
        "      <th>Correct Transaction Time</th>\n",
        "      <th>Step End Time</th>\n",
        "      <th>Step Duration (sec)</th>\n",
        "      <th>Correct Step Duration (sec)</th>\n",
        "      <th>Error Step Duration (sec)</th>\n",
        "      <th>Correct First Attempt</th>\n",
        "      <th>Incorrects</th>\n",
        "      <th>Hints</th>\n",
        "      <th>Corrects</th>\n",
        "      <th>KC(Default)</th>\n",
        "      <th>Opportunity(Default)</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td> 1</td>\n",
        "      <td> 0BrbPbwCMz</td>\n",
        "      <td> Unit ES_04, Section ES_04-1</td>\n",
        "      <td> EG4-FIXED</td>\n",
        "      <td> 1</td>\n",
        "      <td> 3(x+2) = 15</td>\n",
        "      <td> 2005-09-09 12:24:35.0</td>\n",
        "      <td> 2005-09-09 12:24:49.0</td>\n",
        "      <td> 2005-09-09 12:25:15.0</td>\n",
        "      <td> 2005-09-09 12:25:15.0</td>\n",
        "      <td>  40</td>\n",
        "      <td> NaN</td>\n",
        "      <td> 40</td>\n",
        "      <td> 0</td>\n",
        "      <td> 2</td>\n",
        "      <td> 3</td>\n",
        "      <td> 1</td>\n",
        "      <td> [SkillRule: Eliminate Parens; {CLT nested; CLT...</td>\n",
        "      <td>    1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td> 2</td>\n",
        "      <td> 0BrbPbwCMz</td>\n",
        "      <td> Unit ES_04, Section ES_04-1</td>\n",
        "      <td> EG4-FIXED</td>\n",
        "      <td> 1</td>\n",
        "      <td>     x+2 = 5</td>\n",
        "      <td> 2005-09-09 12:25:15.0</td>\n",
        "      <td> 2005-09-09 12:25:31.0</td>\n",
        "      <td> 2005-09-09 12:25:31.0</td>\n",
        "      <td> 2005-09-09 12:25:31.0</td>\n",
        "      <td>  16</td>\n",
        "      <td>  16</td>\n",
        "      <td>NaN</td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0</td>\n",
        "      <td> 1</td>\n",
        "      <td> [SkillRule: Remove constant; {ax+b=c, positive...</td>\n",
        "      <td> 1~~1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td> 3</td>\n",
        "      <td> 0BrbPbwCMz</td>\n",
        "      <td> Unit ES_04, Section ES_04-1</td>\n",
        "      <td>      EG40</td>\n",
        "      <td> 1</td>\n",
        "      <td>   2-8y = -4</td>\n",
        "      <td> 2005-09-09 12:25:36.0</td>\n",
        "      <td> 2005-09-09 12:25:43.0</td>\n",
        "      <td> 2005-09-09 12:26:12.0</td>\n",
        "      <td> 2005-09-09 12:26:12.0</td>\n",
        "      <td>  36</td>\n",
        "      <td> NaN</td>\n",
        "      <td> 36</td>\n",
        "      <td> 0</td>\n",
        "      <td> 2</td>\n",
        "      <td> 3</td>\n",
        "      <td> 1</td>\n",
        "      <td> [SkillRule: Remove constant; {ax+b=c, positive...</td>\n",
        "      <td>    2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td> 4</td>\n",
        "      <td> 0BrbPbwCMz</td>\n",
        "      <td> Unit ES_04, Section ES_04-1</td>\n",
        "      <td>      EG40</td>\n",
        "      <td> 1</td>\n",
        "      <td>    -8y = -6</td>\n",
        "      <td> 2005-09-09 12:26:12.0</td>\n",
        "      <td> 2005-09-09 12:26:34.0</td>\n",
        "      <td> 2005-09-09 12:26:34.0</td>\n",
        "      <td> 2005-09-09 12:26:34.0</td>\n",
        "      <td>  22</td>\n",
        "      <td>  22</td>\n",
        "      <td>NaN</td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0</td>\n",
        "      <td> 1</td>\n",
        "      <td> [SkillRule: Remove coefficient; {ax+b=c, divid...</td>\n",
        "      <td> 1~~1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td> 5</td>\n",
        "      <td> 0BrbPbwCMz</td>\n",
        "      <td> Unit ES_04, Section ES_04-1</td>\n",
        "      <td>      EG40</td>\n",
        "      <td> 2</td>\n",
        "      <td>  -7y-5 = -4</td>\n",
        "      <td> 2005-09-09 12:26:38.0</td>\n",
        "      <td> 2005-09-09 12:28:36.0</td>\n",
        "      <td> 2005-09-09 12:28:36.0</td>\n",
        "      <td> 2005-09-09 12:28:36.0</td>\n",
        "      <td> 118</td>\n",
        "      <td> 118</td>\n",
        "      <td>NaN</td>\n",
        "      <td> 1</td>\n",
        "      <td> 0</td>\n",
        "      <td> 0</td>\n",
        "      <td> 1</td>\n",
        "      <td> [SkillRule: Remove constant; {ax+b=c, positive...</td>\n",
        "      <td> 3~~1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>5 rows \u00d7 19 columns</p>\n",
        "</div>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 3,
       "text": [
        "   Row Anon Student Id            Problem Hierarchy Problem Name  \\\n",
        "0    1      0BrbPbwCMz  Unit ES_04, Section ES_04-1    EG4-FIXED   \n",
        "1    2      0BrbPbwCMz  Unit ES_04, Section ES_04-1    EG4-FIXED   \n",
        "2    3      0BrbPbwCMz  Unit ES_04, Section ES_04-1         EG40   \n",
        "3    4      0BrbPbwCMz  Unit ES_04, Section ES_04-1         EG40   \n",
        "4    5      0BrbPbwCMz  Unit ES_04, Section ES_04-1         EG40   \n",
        "\n",
        "   Problem View    Step Name        Step Start Time First Transaction Time  \\\n",
        "0             1  3(x+2) = 15  2005-09-09 12:24:35.0  2005-09-09 12:24:49.0   \n",
        "1             1      x+2 = 5  2005-09-09 12:25:15.0  2005-09-09 12:25:31.0   \n",
        "2             1    2-8y = -4  2005-09-09 12:25:36.0  2005-09-09 12:25:43.0   \n",
        "3             1     -8y = -6  2005-09-09 12:26:12.0  2005-09-09 12:26:34.0   \n",
        "4             2   -7y-5 = -4  2005-09-09 12:26:38.0  2005-09-09 12:28:36.0   \n",
        "\n",
        "  Correct Transaction Time          Step End Time  Step Duration (sec)  \\\n",
        "0    2005-09-09 12:25:15.0  2005-09-09 12:25:15.0                   40   \n",
        "1    2005-09-09 12:25:31.0  2005-09-09 12:25:31.0                   16   \n",
        "2    2005-09-09 12:26:12.0  2005-09-09 12:26:12.0                   36   \n",
        "3    2005-09-09 12:26:34.0  2005-09-09 12:26:34.0                   22   \n",
        "4    2005-09-09 12:28:36.0  2005-09-09 12:28:36.0                  118   \n",
        "\n",
        "   Correct Step Duration (sec)  Error Step Duration (sec)  \\\n",
        "0                          NaN                         40   \n",
        "1                           16                        NaN   \n",
        "2                          NaN                         36   \n",
        "3                           22                        NaN   \n",
        "4                          118                        NaN   \n",
        "\n",
        "   Correct First Attempt  Incorrects  Hints  Corrects  \\\n",
        "0                      0           2      3         1   \n",
        "1                      1           0      0         1   \n",
        "2                      0           2      3         1   \n",
        "3                      1           0      0         1   \n",
        "4                      1           0      0         1   \n",
        "\n",
        "                                         KC(Default) Opportunity(Default)  \n",
        "0  [SkillRule: Eliminate Parens; {CLT nested; CLT...                    1  \n",
        "1  [SkillRule: Remove constant; {ax+b=c, positive...                 1~~1  \n",
        "2  [SkillRule: Remove constant; {ax+b=c, positive...                    2  \n",
        "3  [SkillRule: Remove coefficient; {ax+b=c, divid...                 1~~1  \n",
        "4  [SkillRule: Remove constant; {ax+b=c, positive...                 3~~1  \n",
        "\n",
        "[5 rows x 19 columns]"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Let's begin asking some basic questions of the data\n",
      "\n",
      "### How many students are interacting with the system?"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Take the column of anonimized student IDs and count the number of unique entries\n",
      "print 'Number of students: ', len(np.unique(traindata['Anon Student Id']))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Number of students:  "
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "574\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### How long does it take a student to solve any problem step on average?"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": true,
     "input": [
      "csd = traindata['Correct Step Duration (sec)']\n",
      "csd.describe()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 5,
       "text": [
        "count    620129.000000\n",
        "mean         18.071478\n",
        "std          34.796694\n",
        "min           0.000000\n",
        "25%           5.000000\n",
        "50%           8.000000\n",
        "75%          18.000000\n",
        "max        1907.000000\n",
        "Name: Correct Step Duration (sec), dtype: float64"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "So ignoring all the students that did not solve a problem step correctly, the average duraction for any problem step was about 18 seconds.\n",
      "\n",
      "Let's histogram this data to see the distribution."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "%matplotlib inline\n",
      "hist = plt.hist(np.array(csd.dropna()),bins=100,normed=True,log=False,range=(0,100))\n",
      "plt.xlabel('Time to correct answer (sec)')\n",
      "plt.ylabel('Fraction')\n",
      "plt.show()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "display_data",
       "png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEPCAYAAABcA4N7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF6hJREFUeJzt3U9sHNd9wPGvpMK+uDQlAS5UIEBIKbcCMWnltpeaNA9G\ngQKmKvfiWyg399Bm2kNg9OBVBbTHyNYxl8i2fCkgIBaVXPYQxPJSQpBTKCq9SK1hSdQfoJIKSj28\nN7vD0e7s7Iqzf78fgNiZ+e3MvB2S89v33sw8kCRJkiRJkiRJkiRJkqRRcqAP+1gG/hpYBOoF46vA\nEaDSZh1JUp/sL3n78/H1cnydKxBfjNMXgKPATGmlkyR1VHaiOAncjdNbNJNAXnwxTgNcb7GOJKmP\n/qLk7U8Dd1LzhwvEbwOH4vzB1LQkaQDKrlEA7Osy/gWhyQlgFvhuz0skSSqs7BrFNrtrB7cLxG8A\n5wn9Fds0m6Eavvzyy2dTU1NllFeSxtatW7euv/fee8e6Xa/sRHEeOE7orJ4BLsXl04Qk0Co+F5ed\nA94HvsxudGpqivn5+eziiVStVllbWxt0MYaCx6LJY9HksWiq1+tHO7/reWU3PW3E1wVCYrga59dz\n4huEfotl4GzJ5ZMkdVB2jQJCzQCal8BCqDHkxS+UWiJJUmH96MxWiSqVyqCLMDQ8Fk0eiyaPxYvr\ndEXSUFpfX39mH4Ukdader7O4uNj1ed8ahSQpl4lCkpSrH53ZQ+HW/cd8+/BJY/61V17iyNTLAyyR\nJI2GiUkU3z58wurFzcb8mbePmSgkqQCbniRJuSamRpF1YD9cu/kAsBlKkvJMbI3i3qMdVi9usnpx\nc1ffhSRpt4lNFJKkYkwUkqRcJgpJUi4ThSQpl4lCkpTLRCFJymWikCTl6scNd8uE0etmaQ5S1Cne\naR1JUp+UXaNIBo1IRq+bKxCfA7bisq0W60iS+qjsRHESuBunt4DFgvHT8XWW5rjakqQBKDtRTAN3\nUvOHC8Q3gBtx+R0kSQPVj87sTsPuZeOvApvACqF/YqaMQkmSiim7M3sbOBSnDwK3c+LTMX4K+AS4\nH+MngDPZDVer1cZ0pVJxAHVJyqjVatRqtcb80tJST9spO1GcB44TOqZngEtx+TQhCaTjs8A6oZ/i\nfnxfsvw5a2trpRVaksZB9kt0vV7vaTtlJ4oNQiJYICSGq3H5elyejW/En1VC5/YhvDxWkgaqH/dR\nJCf6y6llxzvEn2tq6kV6nOwnO0/3YpOSNHHGeoS79DjZP1+0T1ySeuEjPCRJuUwUkqRcJgpJUi4T\nhSQpl4lCkpTLRCFJyjXWl8cWdWA/XLv5AIDXXnmJI1MvD7hEkjQ8rFEA9x7tsHpxk9WLm40b9CRJ\ngYlCkpTLRCFJymWikCTlMlFIknKZKCRJuUwUkqRcJgpJUq5+JIplwgh2KwXj88BTYDP+nC27gJKk\n9sq+M3s+viZjX88RhjrNix+kmcDmgLsll1GSlKPsGsVJmif6LWCxQDw7ZOqfSyyfJKmDshPFNHAn\nNX+4i/gC8FlJ5ZIkFdSPPop9PcbfAu7tcVkkSV0qu49iGzgUpw8Ct7uIz5OjWq02piuVCpVK5YUK\nKknjplarUavVGvNLS0s9bafsRHGe0M9wGZgBLsXl04Qk0S4+22nDa2tre11WSRor2S/R9Xq9p+2U\n3fSUXOG0QEgMV+P8eof4M+B6yWWTJBXQj4GLzsXX7NVMefEbwE/KLJQkqRjvzJYk5TJRSJJymSgk\nSblMFJKkXCYKSVIuE4UkKZeJQpKUy0QhScplopAk5erHndkj5cB+uHbzQWP+tVde4sjUywMskSQN\nloki496jHT5av9GYP/P2MROFpIlm05MkKZeJQpKUy0QhScplopAk5TJRSJJy9eOqp2XC6HWzNAcp\n6hSfJwyNeqjNOpKkPim7RjEfX5PR6+YKxteAC4SxtbPrSJL6qOxEcRK4G6e3gMUC8WXg67jsDM1x\ntSVJA1B2opgG7qTmDxeI/yi+zgGrpZZOktRRP/oo9vUQ/45Qk0hqGBeyb6hWq43pSqVCpVJ5gSJK\n0vip1WrUarXG/NLSUk/bKTtRbBM6pAEOArdz4tOp+I1U/Ee0SBRra2t7WlBJGjfZL9H1er2n7ZTd\n9HSecDUThKuYLsXp6Rbx2Rj/IrVsGvh9yWWUJOUoO1EkHdELhNrB1Ti/nhO/EaeXCbWNL0suoyQp\nRz/6KJL7IC6nlh3vEE+WPdfkJEnqL+/MliTlMlFIknKZKCRJuUwUkqRcJgpJUi4ThSQpl4lCkpTL\nRCFJytWPG+5G2oH9cO3mAwBee+Uljky9POASSVJ/WaPo4N6jHVYvbrJ6cZNvHz4ZdHEkqe9MFJKk\nXCYKSVIuE4UkKVfRzuw54DTNQYYSzwgDC0mSxlTRRPE5YQyJD+k8tKkkaYwUTRSHgH/qcR/LhIGI\nZmmOM9EpfpqQlFbarCNJ6pOifRQfAz/tYfvz8TUZlGiuYHwF+BNwvdsd3rr/mGs3H3Dt5gOe7Dzt\ndnVJUkbRRPGPwL8Bd4DN1M+fOqx3Ergbp7eAxYLxFeAHwG8Klq/h24dPGvc9PNl51u3qkqSMok1P\n/9Dj9qcJySVxuGD8EGEc7XngTI/7liTtgaKJYusF9tGp87tVPOmXeIuQMC63eI8kqQ+KJopXCR3M\nJwm1gG3gPKHD+X7Oets0L6k9CNzOiU/H+AqhlnEhzs/SIlFUq9XGdKVSoVKpFPwokjQZarUatVqt\nMb+0tNTTdoomiuQb/hvADcJJ/Vz8eTdnvfPAccKJfga4FJenk00SnyVcgnsIuBLfdzi1zi5ra2sF\niy5Jkyn7Jbper/e0naKJ4gShRnAvzm/T/Oaflyg2CIlgIa5zNS5fj8uz8Y0YX46v36XWkSQNQNFE\ncYNQm0hfhfQGxfouktpIuvnoeIf4hYLlkiSVrGii+JBQC/iEkByOAqfo/WooSdKIKHofxRfAMULN\n4jDhRrhj+M1fksZeNyPcbRFuupMkTZBOieIKodnpdJu4T4+VpDHXKVGcIjQzrdD6xjifkSFJY65T\nokguul3n+cdvAOwAB/a0RJKkodIpUTxtM534Yg/LMvQO7IdrNx8A8NorL3Fk6uUBl0iSytcpUSRX\nRV0iPHdpot17tMNH6zcAOPP2MROFpIlQ9PLYt4B3gNfj/DLw41JKJEkaKkUTxVnCpbFJh/YN4GfA\nL8oolCRpeBRNFCcJtYrkWUz1OP9+GYWSJA2PooniLuFR42mvEh7kJ0kaY9086+k3PP+spw9LKpck\naUh086yn44SaxRtx2Rs0n/wqSRpTL/qspzfZ/ehxSdKYKZooZggd10k/xT7CSHSLNIcybWeZ0Jcx\nS+saSF58FThTsIySpBIUbXr6nHAi30/on6jH+U7jUczH12RQorku4ot4k58kDVzRRDFPeDBgcjns\np4QTebXDeicJ/RoQmq4Wu4j7wEFJGgJFE8U28P04fQiYisuOdlhvmjCudiL7YMF28Tl2D40qSRqQ\non0Ua4Sb7Q4STuC/IZzgrxRYt9XjyTvFO/V7SJL6pGii+BT4DLhHuHfiBCFpdLo8dpvmSf8gcDsn\nPh3j1iYkaYgUTRRXCMkhuRO76OPFzxPuv7hMuHLqUlw+HbeVjs8Sxr2YjT+HCUlkjuajQxqq1Wb3\nSKVSoVKpFCySJE2GWq1GrVZrzC8tLfW0naKJ4jKhJvGTLre/QUgEC4TEcDUuX4/Ls/ENmklhhXA5\nbstO7bW1tS6LIkmTJfslul6v57y7vaKJYp5wMn+fcHVS4hnwgw7rJs1T6eak4x3iyXLv/JakASua\nKHxKbEZ6tDtwxDtJ46tTorhN6CtIahE+siNKj3YHjngnaXx1uo/iIOGeicQ6zfspJEkToOgNd5Kk\nCWWikCTlKtKZPUu4dHVfaj7tz3tZIEnScCmSKLIX3q6npp8BB/auOJKkYdMpUdg0JUkTzkQgScpl\nopAk5TJRSJJymSgkSblMFJKkXCYKSVKuok+PVQfpp8n6JFlJ48QaxR6592iH1YubrF7c5NuHTwZd\nHEnaMyYKSVKufiSKZcLoeCtdxE/EZWfLLZokqZOyE8V8fE2GOZ0rEF+IP5cJDyB8vcwCSpLylZ0o\nTgJ34/QWsFggfhn4SVx2CLhachklSTnKvuppGriTmj9cMP4qcAr4uLyiSZKK6Ecfxb4e4veAM8D7\nwMyel0iSVFjZNYptQvMRhPG3b+fEp2M86cfYIIyFcYKQNHapVquN6UqlQqVS2bNCS9I4qNVq1Gq1\nxvzS0lJP2yk7UZwHjhP6HWaAS3H5NCFJpOOzhEGRFmkOljQN/L7VhtfW1kortCSNg+yX6Ho9Ow5d\nMWU3PW3E1wVCYkg6ptfbxDeATwlJY4XQ0f1lyWWUJOXoxyM8zsXXy6llx3Pi91LLJEkD5rOeSuBz\nnySNEx/hUQKf+yRpnJgoJEm5TBSSpFwmCklSLhOFJCmXiUKSlMtEIUnKZaKQJOXyhruSpW++A2/A\nkzR6TBQlu/doh4/WbzTmz7x9zEQhaaTY9CRJymWikCTlMlFIknKZKCRJuUwUkqRc/bjqaZkwet0s\nrQckahVfia9HAcc8laQBKrtGMR9fk9Hr5grEFwhDpZ4jJI+FMgvYb8l9FdduPuDW/ceDLo4kdVR2\nojhJGPcaYAtYLBCfTb1vK86PDQc1kjRqym56mgbupOYPF4ifSc3PA7/K28Gt+493nXCf7DztqaCS\npNb60Uexr8f4PPANcDVv5W8fPmH14mZj/ueLM10VTpKUr+xEsQ0citMHgdtdxBeAn7XbcLVaBeB/\nHjzh/tPvMXX09b0orySNjVqtRq1Wa8wvLS31tJ2yE8V54Dihs3oGuBSXTxOSRLv4KZpNUAs0O7sb\n1tbCxVDXbj7gj6kaxSjxgYGSylSpVKhUKo35er3e03bK7szeiK8LhMSQNCOt58QXgSqwSei/eFZy\nGQcm3bFt57akYdWPPork3oh0reB4TnydZnOUJGnAvDNbkpTLRCFJymWikCTlcoS7IZK+CsoroCQN\nC2sUQ8THe0gaRiYKSVIuE4UkKZd9FEPK/gpJw8IaxZCyv0LSsDBRSJJy2fQ0Anx4oKRBMlGMgHuP\ndvho/UZj/szbx0wUkvrGpidJUi4ThSQpl01PI8hLZyX1Uz9qFMuEgYlWuoyfLrNQo8xLZyX1U9mJ\nYj6+JoMSzRWMnyIkEHWQ1C6u3XzArfuPB10cSWOo7ERxErgbp7cIw5wWiX8a59WBtQtJZSs7UUwT\nxr1OHO4yLkkasH50Zu97wbgK8sY8SWUoO1FsA4fi9EHgdpfxtqrVKgD/8+AJ959+j6mjr79YScdA\n9sa8f/+7Y43mKJOGNHlqtRq1Wq0xv7S01NN2yk4U54HjhM7qGeBSXD5NSBLt4h2tra0B4Rv0Hy9u\n7l2Jx0g6cXg3tzR5KpUKlUqlMV+v13vaTtl9FBvxdYGQGK7G+fUO8ROEBPLjkss3Mbw6SlKv+tFH\ncS6+Xk4tO94h/kX80R6xdiGpV96ZPYHs9JbUDRPFBPJptJK6YaKQz46SlMtEoV01DC+plZRlotAu\n7ZIGmDikSWWiUFvewCcJTBTqgk1U0mQyUagnNlFJk8NEoRdmE5U03kwU2nPtahuvvHSAh092Gu8z\niUijwUShUqWTxs8XZ6x5SCPIRKGByevnSNc+TCLSYI1sovjt9TAw3isvjexHUEq2nyNd+7D5Shqs\nkT3Lfvzb/wLgX5dmB1wSla1o85W1EKkcI5soJGifRPKaskwoUndMFBpLeU1ZRRNKOoncuv/Ye0U0\nsfqRKJYJo9fN0hykqFO80zrSnijaN/Jk5yn/8uutxvvaNXll+1CsvWgclJ0o5uPrZcJJf47m8Kft\n4vs6rKOUP1z5HfBXgy7GUNjrY5Ft1sqLtetDKdop3y7Z9Np5X6vVdo2VPMk8Fi+u7ERxEvgqTm8B\ni+w+6beKHwYu5ayjlD9c+R0c+/tBF2MoDPuxyOuUb5dsinbeZ+f/86vf8pezP+z4vkmoDZkoXlzZ\niWIauJOaP1wg3mkdaWIVTTY/eLLD6sXNju/rZ21oUO97+Lg5PWmyfWu96kcfxb4XjEsasL2oDQ3q\nfX/zfzuNERyHJXn1633ZvrXqPD0p+yRdJTQjXQZOADPAmTbxZUKfxOEO6/DLX/5y88iRI0dLLrsk\njZVbt25df++9944NuhxZc8BKnF4FXo/T0znxdutIkgZgf8nbTzqhFwiXu16N8+s58XbrSJIk9WQ1\nNb1M+JKx0ua90iQ4nZlv9X9R+H+l7BrFXvMkED77CqF/JzHJx2UReCtOp+/LgdCMOSnmCX8HPZ0I\nxswLnRTHwCnC5020+r/o6n9llBLFJJ8EEguEZrtzhI7/BZrHYVKPy7PU9LvA3Tid3IMzKdaAC4T+\nv65PBGNkjvC7vxxf55i8/5FPCZ89cZLn/y9OEpr208vaGqVE0erDTppZmp97K86/Sxe/8DEzR/Of\nH+BVJvMenBPA13H6DKGfb5L/X5Jml1nCsZjkLxCwB/erjVKi8Ea8UJNInn01D1whHJfbqfdM0nE5\n1GLZJN6Xc5zwe5+j2V8zqf8vG8ANwmdPPv+kHou0F/q/GKVEAZN5EmhlHviG5hVik3hcsrUJCDWr\nJHkcZHcCHXff0fx7SNqnJ/HvYhrYJPRFnCPchwWTeSwS6f+L5ItlV/8ro/SY8Uk+CWQtAD+L05N6\nXGZp3qB5iJA4zhO+XV8mnCAutV17vNwmfIuG8PfwIyb372IF+AS4TzgGJ5jcY5FI/1/MEv4v9tHF\n/8oo1SjOEz4kTNZJIOsUzTvVF5jc43Ih/jwj9E08Y3LvwfmC5t/ANPB7JvfvAkKSgHAS3GbyjsUJ\nQhL4cZyfuPvVVpicS9xaWSS0tW7G1zfj8kk/Lgq/+2Xg48yySfy7WOX5S4Un9VhIkiRJkiRJkiRJ\nkiRJkiRJkrr3DfC0xc8O4Y7oKyXu+2mJ2y7bKJcd4AP2/kmr2fsbJI2RKZpPZ30zzifLFkrc7yif\nbEe57NOU9wVgs6TtShoSd9g9jnnyFFsIj0n4Bjgb3/dVjF8nnDTTI9EtppZ/RUg4WZdiPHk2z4nU\nOp+1WSf9vjuxLNnl2fWT59+cjp9lJjPfqbyt9pcte9qp+N6nqf0l5fiGcJySO/DT3+g/oflU1A/i\nukmS/oDdiek6IaG3K3f2M2d9QPMRENl9F/09tvs9VNk9sI6kMdMpUTwF3iGcMJKTxBThhJacyJLH\nPv9tjJ0lnLhbSdZJtv1m3PZn7D75kHpfUsYZwsn2nQ7rJ7FfAN9vMZ9X3nb7S5c9bTou/2GqHMlo\nhcl+fxrnzxJOvhBOuptx/3Pxff+RWvcSoSnw+6l9JzXAduVOf8asK6nlrfY99QLHZYH2v29JY6BT\nokg/9/8su589lJxgTvH8ieIOrSUn2w8IJ7XETJt1Psjs83XCyS1v/eSkSZv5vPJm9zdH8/i0a3pK\nf+v+hN0JK/2Z0v0/y+yuYSRNfkn8LuHzrRC+5V+J0+3Knf2MWelYq31D98clWT/7OTXkRukx4xoN\n6RPANs+fEPYBRwnfUtOxZ+Q7xO7hHW8QvtFmJc03ieSpmO92WD8dy87nlTe7vw06+2fCST7ZXnpf\n6X2kx1C4QDgGn8fXjwlPEZ4nJL3rhGFy3yWMdrfeodzZ/aZN0xw1MW/fvR6XLVr/7jSkRukx4xoP\nzwjfTr8gnHSSn07DU94mnJgS2ZNZYjvzvnnCN+Ki67eSV952+2vnBCFJvAksxe0WMUs4+R8D3gDe\nj/upAx8Smp7WY+wt4FeE5NHtce5m33t5XCSNkU5NT+krWqrs7vjMtm0vxOlPaLbFZ2X7KJJ1Pmd3\nU1IiaVKai+tcJ3TKzuSsny13dj5p629V3nb7S5c9bSW1bnJlUbpdP73f9LFNOq9fTb3vHcIxTvqF\nIDRBJR3oecc5u6+sdNnb7bvX42LTkzTmWiWKr+P0LPCnVKxKs2MWQmfrVJxeIJxwngK/Ti3P+iyu\nB81vsU8Jg9G0W2eF5lVF6WTSbv1subPzncrbbn/psqd9Fd//Nc0mqDdb7Dd9bJP1nsb3J+3/i+w+\nrp/F8nUqd6vPmJa+GqvdvvO2D+2PyyLh+EuSRtgq5d0Yd5pmDUiSNKJexRvuFB0YdAEkDaXHhGTx\nv8B/7+F2lwlXQ9X3cJuSJEmSJEmSJEmSJEmSJKX8P/f22KDNqvhiAAAAAElFTkSuQmCC\n",
       "text": [
        "<matplotlib.figure.Figure at 0x1077a1a10>"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "counts, bins = hist[0], hist[1]\n",
      "cdf = np.cumsum(counts)\n",
      "plt.plot(bins[1::], cdf)\n",
      "plt.xlabel('Time to correct answer (sec)')\n",
      "plt.ylabel('Cumulative fraction')\n",
      "plt.axis((0,100,0,1.0))\n",
      "plt.show()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "display_data",
       "png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEPCAYAAACtCNj2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XlwnPd93/H3YrFY3Ccv8BAJkJQlWYoEkJItB3ZkgaLd\nxB0nJiOncZUmMyZtd5qZNrVEOo1nMtNOTFnueNxpxjpSp62a1pIluZ1xp7ZIKj4g25IoUJJ1iwB4\ngSeOxY3FHk//+D0PdrHEYnfBPZ/9vGYwz7XPPj88wP6++/xOEBERERERERERERERERERKUIPr3Bs\nH9ALHMhTWkREJImKPFzjICbjX063vTxuL7tynxwREUkmH0HhcWAwybH7gXF7fRDYk4f0iIhIEvkI\nCitpBsbittsKlRARESl8UADwFDoBIiJiFDooBIBWe70FGC1gWkREyl5lga7bjAkITwG7MRXNHcDR\nxBc+99xzVmNjY35TJyJS4i5evDjwwAMP7Mj0vHwEhf2YjP+LwN/Z+47Z+07ay15MkHgt8eTGxka6\nu7sTd5elI0eOcPjw4UInoyjoXsSUy72IWhbzoShzoSizociS5VwowmwoytOPf4f7HviXzIWizIei\nzIcjzIcte+nsM8u5sDkvauXvd/BVeKiqrKDK66HKay+XbJt1n9eDvzJ+uwKf10NlhXmdzxtb+uLe\nq8bnpdZXQY3PC7B9NWnMR1B4xv6Jtztu/Ql7eRwRcR0nM58JRZhdiDC9EGFmIcLMgsnUZxdMhj6z\nuD9i77eP2+vz4WjKaw0PT3Gp/1JG6fN6oNrnxW9n0P7KCqrtDNlf6aHa2baX1XGv8S++zrzW7zWv\n89vn+rzOa0zG7a3IXxXq+VWeV6jiIxEpIQuRKFPBCFPB8OJyOhhhKmgy+elghJmFsJ3hR5eszy5E\nyNaX8erKCmrsb8K1S5Zm/Zev1fKZrg3UVFZQ7Ytl4rF17+J2jb3P5y101WpxUVAoIT09PYVOQtHQ\nvYjJ9F7Mh6NMzIWZmA8TmA8RmAszGYwwNR9mws70J+fDTAXt/cEIwTS+pa+kurKC2qoK6nxe6qqW\n/tT6Kqit8lK75FgFtT6vvd9k+NWVqb9p3+n5XXp2tV9XWstd0TcHPXbsmKU6BZHkopbFdDDC+FyI\n8bkw43MhxmbDS7YDc2EC82Gm5sMEI5l/b/d6oN5fSaPfS4O/kga/lwa/l3p/JfVVXur9XurjMvr6\nKi91fu9iEMhnsYkY/f397NmzJ+MbrycFkSIViVoE5sOMzoYYmw0xOhtidCa0uD1mZ/6BuRCZ5PM+\nr4cmfyXNNZU0Vcd+GqorabIz/cZqL43+ysX16soKPB5l7OVAQUGkACJRi9HZEFdnFrg6HWJkZoGr\ns6HF9ZEZk+mn2zKmrspLS43J6NtqfLTU+uxts2ypqaS52qcMXlJSUBDJgYVIlCvTC1yaMj+Xpxe4\nPBXk8vQCV6ZDjKeZ4TdVV9JWW0lrrY82+6c1btlqZ/pVlaoslexQUBBZpelgmAuTC1yYDHJhMsjF\nqSAXJhe4OBlkZDa04rkeoLW2krV1Vayt87G2roo19nJtnY+2OpPxq2WM5JuCgsgKZhYinJ+YZ3gi\nyPmJ4GIAuDAZZDIYSXpehQfW1lWxocH8rG/ws6G+ivUNVayrq6KtzkelKl+lCCkoSNmLRC2uTC9w\nfiLI+Yl5zk0EOReY51xgnrG5cNLz/JUVbGqsor3BT3ujn42NftobqtjY6GdtfZUyfSlJCgpSNizL\nYmQ2xNDYHKfH583P2BxnA/MsJGm+U+X1sLnJz8bGajY1+dnU6GdTkwkArTWVqrAV11FQEFdaiEQ5\nPT7PwOgcg6NzDI3NMTQ+x1SSIp+2Wh+bm/z2TzU3NFezpdnPuvoqKpTxSxlRUJCStxCJcnpsnveu\nzvD+yCynRuc4PTa3bNv9Br+XjpYaOlqr2dZaw7aWarY2V1Pv10dBBBQUpASNzoZ4+/IMb12e5u3L\nMwyMzhFKaN/pAbY0+dneVkNnWw2dreanrdanIh+RFSgoSFGzLItzgSBvXJrmrcvTvHV5hktTC9e8\nbnOTnw+treXGNeans63GGT5YRDKgoCBFxbIszk8Eee3CFK9fnOaNi9ME5pe2AKr1VXDTujpuWVfH\nLevruHldHXVVCgAi2aCgIAUXmAvRPzzFq8NTnLwwxcjM0o5frTWV3NZez20b6vnw+jq2tdRogDWR\nHFFQkLwLRy3euTLDiXOTnBie5NTI3JLx9puqK7mjvZ7bNzZwx8Z6NjX6VQ8gkicKCpIXgbkQL5+b\n5NdnJ+kfnmQ2FBuf3+f1cOv6enZvbqB7UwMdrTVqBipSIAoKkjOXpoL0DQXoOz3BO1dmljwNbG7y\nc+fmRnZtbuC32huo1oBuIkVBQUGy6uJUkJ8PBvjZ4DinRucW9/sqPNy+sZ6PbGnirhsaaW/wFzCV\nIpKMgoJct/G5ED8dGOcfB8Z59+rs4v4aXwV3bWmkZ1szd25upFYthESKnoKCrMpCOMovz0xw7NQY\nJ85PLs4NUF1Zwd1bm/hEhwkEGudfpLQoKEhGBkZn+fF7Y7wwMLY4jpDXAx+9oZE9O1q564Ym1Q+I\nlDAFBUlpPhzlZ4Pj/OidEd6LKx7a0VbD3hvbuKezmeYaXwFTKCLZoqAgSV2aCvK/37rK8++PMb1g\nngrqqrz07mjh0ze2sWNNbYFTKCLZpqAg13jnygzP/uYKfacDi3UFN62t5TM3r+ETnS0qHhJxMQUF\nASBqWbx0dpIfvHGZNy/PAKauYM+OFv7g1nXs1FOBSFlQUChzkajFPw6M879eu8S5iSAA9VVefu+m\nNj774bWsqasqcApFJJ8UFMqUEwz+4eQlhidNMFhT52Pfrev4Jx9qU58CkTKloFBmopbFL4YC/LdX\nL3LefjJob6jij7s20LujVZPNi5Q5BYUyYVkWr5yf5O9PXGTAHn6ivaGKL3Rt4F4FAxGxKSiUgTPj\nc3z318P0D08BsKbWxxe6N/CpG9sUDERkCQUFF5sOhnmy/xL/5+2rRC1TgfzHd6znn96yFr+alYrI\nMhQUXMiyLH46OM6jvx5mfC5MhQc+c9Ma/mTXBvU8FpEVKSi4zPDEPP/pxfOcvGCKij68vo5/9bHN\nbG9TPwMRSU1BwSUiUYsfvnmF//rqRRYiFg1+Lwfu2sTeG1s1i5mIpE1BwQXOT8zzrZ+d5e0rpify\nfTtbOfiRTTRV688rIpnJR66xDwgAncATqzguSViWxY/eGeHxl4YJRizaan38m49v4a4tTYVOmoiU\nqFwHhW57eRyT6XcBJ+OOdwGDcfsSj0sSgbkQ3/7FOX51dgIwYxR95e7NNPj1dCAiq5frHOR+4Hl7\nfRDYw7WZ/sPAXkzQOJ7j9LjCyeEpHv7pacbmwtRXefnXPVv4RGdLoZMlIi6Q66DQDIzFbbclHD8J\nDNmvOZDjtJQ8y7J4+o0r/P2JC0QtuG1DPYfu2cq6eg1aJyLZkY+yhpWavjQDpzAB4QmgHxMkJMHs\nQoRv/fwsfacDAHyhawP/vGsDXvVIFpEsynVQCACt9noLMJpw/ADwGDBpv3Y/8Ejimxw5cmRxvaen\nh56enlyktWhdnAzy9ecHORuYp9ZXwaF7tnH3VlUmi0hMX18ffX19i9t79+5d1fvkOig8BezG1BV0\nAEft/c2YIAAmIECsMvoahw8fzmESi9tbl6b562NDTMyH2dpczV/f18GmpupCJ0tEikziF+b+/v5V\nvU+ug8JJTFDoxQSB1+z9x+z9jwAPYiqhW1GT1CVeODXGf/z5WUJRi92bG/h393ZQp3kORCSH8lGn\n4GT08S2LdsetX1NcJPDMG5d5/OULAHz2ljV8+aObVX8gIjmnRu1FxrIs/uHkJf57/yUAvvLRTfzB\nresKnCoRKRcKCkXEsiy+d+IiT71+mQoP/MXHb2DvjYmteEVEckdBoUhYlsVjLw3z3JtX8Xrg8Ce3\n8TvqkCYieaagUAQsy+K/vHKB5968SmWFh7/q3cbHtjYXOlkiUoY0/VYReLL/Ek+/cQWvBwUEESko\nBYUC+/7rl/gfJy9R4YGvfVIBQUQKS0GhgP7fe6N875WLeICvfmKrBrUTkYJTUCiQl89N8J2+swD8\n+W9vYc/O1hRniIjknoJCAbw/Mst/OH6aqAV/dPt6PnPzmkInSUQEUFDIu8tTC3z9JwPMh6P07mjh\nz3a3FzpJIiKLFBTyaCES5d8fH2J8LswdG+v5i4/fgMejoStEpHgoKOTRo78a5v2RWdbXV/FX93bg\n8+r2i0hxSbfzWhdm2szE2lALuDOrKXKpYx+M8aN3R/BVePh6bweN1eo3KCLFJ92c6QeY4a4PsfJM\narKMobE5vvPiOQC+cvdmblxbW+AUiYgsL92g0Ap8OZcJcatgOMrfvHCaYDjKnh0t/N5NGuBORIpX\nuoXa3wC+msuEuNX3TlzgTGCezU1+/vy3t6hiWUSKWrpPCn+EqVf4S2Asbr8F7Mx2otzi5PAUP3RG\nPb1nGzU+zZomIsUt3aDwhzlNhQtNB8M88vMzAHyhu131CCJSEtINCoM5TYUL/edfnmdkJsRNa2v5\nZ7evL3RyRETSkm6dQhPwKKboKGovvws05ihdJe2lsxO8MDCOv7KCh+7ZqrmVRaRkpBsUnsC0QNpl\nn9MJrLH3S5xgOMrf/uo8AP+iewObm6oLnCIRkfSlW3y0H2gBJuztAHAA88Tw+Rykq2Q99fplLk0t\n0NFSze/fuq7QyRERyUi6TwpDmKeEeLtQXcMSwxPzPPX6ZcAMh12pYiMRKTHpPikcwvRofgwTCLYD\nB1GrpEWWZfG3vzpPKGpx385Wbt1QX+gkiYhkLN0nhWeAHZgnhjZgwN5+NkfpKjkvnp7gxPkp6qu8\nfPGujYVOjojIqmQyKtsg8M1cJaSURaIW3ztxAYA/3d1OS42vwCkSEVmdVEHhBKbo6OEkxzVKKnD0\ngzHOTwRpb6jid2/SLGoiUrpSBYWDmKKiAyw/OqqV9RSVmIVwlCf7LwLwJ7vaVbksIiUtVVDot5fH\nMHUJiSJAWQ/o83/fHeHqTIiOlmo+ub2l0MkREbkuqYJCNMm645kspqXkzC5E+J+vmSaof7p7IxUa\nAVVESlyqoOC0TjoK3JfjtJSc5966ysR8mFvW1fHRGzTih4iUvnSbpN4HfA64w97eB3wxJykqEXOh\nCD988wpgWhxpngQRcYN0g8KjmOaoTs43BHwNMyheWfrJ+2NMBSPcsq6OOzY2FDo5IiJZkW5QuB/z\ntHDS3u63t7+Ui0QVu0jU4jn7KWH/bRrfSETcI92gMI4ZPjteE2ZgvLLz4pkAl6YW2NhYxd1bE2+L\niEjpymTsoxe4duyjQzlKV9GyLItn3jBPCZ+7dZ3mShARV0k3KDyDKTLajxkdNWAvh9I4d5/9+k6W\nn3+hG+jAzNdQ9PMzvH15hnevztLg93LfztZCJ0dEJKvSLT6C2NhHXwYOYwLCvSnO6baXx+1l1zKv\nOYwZWK85yfGi8sxvzFPCZ25eQ42vrPvtiYgLpfuk0IGpVHYK0D2Yb/Z77GUy9wPP2+uD9utPxh3f\nD7xirz+SZloK5uJkkF+emcBX4eGzt6wtdHJERLIu3SeFH2CKfyow9Qn99naq+RSaMbOzORKHytht\n7+sCHkwzLQXz4/dHsYBPdDbTWquRUEXEfdJ9UugGejHTcT4PPA48jenpnGqU1FQ1sSOYp4c9mPqH\na+ZoOHLkyOJ6T08PPT09aSY7eyJRi6Pvm/j26RuXGwZKRKRw+vr66OvrW9zeu3fvqt4n3aAQALYB\nr2OKixrtfdvTOM8pXmoBRhOOjxKrrA5gAsw1QeHw4cNpJjN3Xh2eZGQ2xMbGKn6rXbOqiUhxSfzC\n3N/fv8Krk0u3+Ogw5tt8E6bS+AXME8OJFOc9hSlmAlMvcdReb7aXz8QdbwZeTjM9effj98xTwqdu\nbNOQFiLiWukGhccx3/gnMH0TjmDqGVI9nziVyr2YJ4HX7O1j9nLI3r/Pfv/n0kxPXgXmQvz67AQV\nHtQMVURcLd3ioxOYlkJOD+ZMhsx2+h4cj9u3e5njRTvf8/FT44SjFndtaWRNXVWhkyMikjPpPikc\npwx7L4Ppwfzj901ViCqYRcTtMm199CVMfwOHBezMdqKKybtXZzkzPk9TdSUf0ZwJIuJy6QaFg6Ru\nWupKx0+ZCuY9O1rweTPpAC4iUnpSBYUIpinpCZafo9nVopZF32lTjfLJ7apgFhH3SxUUvkWscnm5\nOZotwLUDAL1zeYax2TDr66vYuaam0MkREcm5VOUhh+zXHLeXiT+uDQgAv7CfEn57W5P6JohIWchk\njuayYlkWL56eAODj25pTvFpExB1Uc5rEByNzXJ5eoLW2kpvX1xU6OSIieaGgkIRTdNSzrZkKFR2J\nSJlQUFiGZVn8YigWFEREykW6QaEJM1T2GGZk0y7MgHiu7M01NDbPhckgTdWV3LZBI6KKSPnIZJKd\nMcygdR7MQHdD9n7XcfomfGxrE94KFR2JSPlIt0fzHmLDXTsOsXRWNdd48bSKjkSkPKX7pDAE7ErY\nt4ul4yC5wsjMAkPj81RXVnDHRhUdiUh5SfdJ4UuYoa2PYZ4YnsY8PaSao7nk9A9PAXB7e73GOhKR\nspNuUNgP/BmwDvN0MIAZJC+w0kml6FU7KHRvaihwSkRE8i/doDABfB0zdebTwCu4MCBELWvxSWHX\nZlc2rBIRWVG65SOHMLOldQL9wDcxTVO/m6N0FcTg6BwT82HW1vnY0uQvdHJERPIu00LzAKbo6KR9\nrqvGRHKKjnZtatQAeCJSltINCgcwxUZR4GFgBDMb244cpasgXh2eBGDXZtUniEh5SrdO4X5MUHBl\n5TLAXCjCW5dm8ABdGxUURKQ8pRsUXFVMtJzfXJomFLW4cU0tjdXp3hYREXdJlfudwFQyP5zkuAXc\nmdUUFchiqyM1RRWRMpYqKBzEVCwfwIx5lMjKeooKZLGSWfUJIlLGUgWFfnt5DGhb5ngEF0zJOTKz\nwBl7aIub12lCHREpX6mCQjTJuuOZLKalYH5zaRqA2zZoaAsRKW+pgoKTQx7FxZXNb16aAeDWDXpK\nEJHylu7X4mQB4d5sJaSQ3rpsgsKH12tUVBEpb+m2vezAjJQaP8FAK2ak1NZsJyqfZhYiDI3NUVnh\n4UNrawudHBGRgspk5rVOTAskZ/yjTlwwdPY7V2awgJ1ravBXqj5BRMpbuk8K3UAvZrTU54HHMT2c\nj1Li/RTetCuZVXQkIpL+k0IA2GavtwKN9r7tOUhTXsXqE1TJLCKS7pPCYczIqC3AceAFzPzMJ3KU\nrrwIRy3evaKgICLiSDcoOMVFE5hhL/ZjAsQTOUpXXgyMzhKMWGxu8tNc4yt0ckRECi6Tkd/iR0fN\npNPaPvvcTlYOIg8Cj2TwvtdNRUciIkulCgqnUhy3gJ0rHO+2l8cxQaELUwyVaA+mL0Reg4LTaU2V\nzCIiRqqgcP91vv/9mNZKAIOYzH+5oJD3gfUsy+Lty6blkXoyi4gY6Q6It1rNmAppx3KD6nVhniQO\nXee1MnJxaoGxuTBN1ZVsatR8zCIikH6dwvNJ9lvAp1Kcm2qy44L0iH7rstM/oU7zMYuI2NINCt9M\n2G4Gvgb8TYrzAsQy/RZgNOG485SQd2+rkllE5BrpBoVjSfa9Ajy7wnlPAbsxGX8Hpgc0mKDitEjq\nxBQrtZKkIvrIkSOL6z09PfT09KSZ7OQ+GJkD0HhHIuIKfX199PX1LW7v3bt3Ve9zPZMRt5C6R/NJ\nTFDoxQSB1+z9x+z9TkA5ADSRpML58OHD15HMa4UiUYbGTFDY3qagICKlL/ELc3//6qqE0w0Ky/Vc\n7sZ0akvF6ZsQX0y0e5nX5K0j3NnAPKGoxaZGP3VVJT9xnIhI1qQbFA5ivsU3xu0LAENZT1EevG8X\nHe1cU1PglIiIFJd0g0Iv8PAy+y1KcI7mD0ZmAdixRkVHIiLx0h0l9WFMj+OKhJ+SCwgQCwo7FRRE\nRJZINygMYVoalbxw1GLQrmTe2abiIxGReOkWH/0hcBp4jKU9lC3gW1lOU06dHZ8nFLFob6ii3n89\nja9ERNwn3VzxCDCOaYZa0t1/PxhV0ZGISDLpBoXdmM5nEzlMS16oPkFEJLl06xSexrRAKnmn1BxV\nRCSpdJ8U7sT0VQhwbZ3CSvMpFJVI1GLALj7aoZ7MIiLXyKSiueSdDcwTjFisr6+isVqVzCIiidLN\nGQdzmoo8UX2CiMjK8jGfQtE4Nar6BBGRleR6PoWioicFEZGV5Xo+haIRtSwG7CeFHerJLCKyrHSb\npC4nnfkUisblqQXmw1FaaypprvEVOjkiIkUpH/MpFIWhcfOUsK1VTwkiIslkMp9CogAl1CppaGwe\ngI6W6gKnRESkeKUbFFY3r1sROW2PjNqhJwURkaTSqVPYBzwVt30QOAV8MScpypGhcfOksK1FQUFE\nJJlUTwoHMBPsHIrb5wQIp5nq32U7Udm2EIlyfmIeD3CDio9ERJJKFRQOYYa4OB63bwJTwTwGfIMS\nCArnAvNELdjU6Ke68noaXImIuFuqHLKT5DOuHaNEmqQuVjK36ilBRGQlqYLCEGYuheXsokRaH512\nmqOqPkFEZEWpgsLDmKKibQn7OzBTcz6WgzRlXexJQUFBRGQlqeoUHseMczSIaZY6BrRiOq4dAh7J\naeqyxOm4puIjEZGVpdNP4ZuY4LAbU8cwALyK6bxW9KaCYUZmQvi9Htob/IVOjohIUUu381qA5QfF\nK3pn7P4JN7RU463wFDg1IiLFzfXtM4ecnsyqZBYRScn9QcHpyaxKZhGRlFwfFJwxj7apJ7OISEqu\nDgqWZS0+Kag5qohIaq4OCldnQswsRGj0e2mtSbdOXUSkfLk6KJwejw2X7fGo5ZGISCouDwoaLltE\nJBPuDgpOJbN6MouIpMXdQcGpZNaTgohIWlwbFCJRizMBExS2qjmqiEha8tEkZx9mmIxO4Illjh+w\nl9uBw9m66IXJIKGIxbp6H3VV3my9rYiIq+X6SaHbXjozt3UlHO/FjKn0BCZo9GbrwqpkFhHJXK6D\nwv3AuL0+COxJON4Zt2/Q3s6K2JhHKjoSEUlXrouPmjFzMDjaEo7HFyd1A9/P1oWdJ4WtelIQEUlb\nPiqa0+k11o2Zo+G1bF30tCbWERHJWK6fFAKYmdoAWoDRJK/rBb6W7E2OHDmyuN7T00NPT8+KFw2G\no1yYDFLhgS1NCgoi4n59fX309fUtbu/du3dV75ProPAUZsa245h5nY/a+5uJzdx2kNi0nr3EKqUX\nHT6cWaOkc4F5ohZsafJTVenaVrciIosSvzD39/ev6n1ynWOetJe9mCDgFA85s7jtAY4ApzB1D1Y2\nLnpacyiIiKxKPvopOJXJ8U8Au+3lMWLFS1mjlkciIqvjyrIV9VEQEVkdlwYFDYQnIrIargsK08Ew\nV2dCVHk9tDf4C50cEZGS4rqgcMYuOrqhuRpvhSbWERHJhOuCwpBaHomIrJrrgsJifYJaHomIZMx1\nQWFg1ASFTj0piIhkzFVBIWpZDNp9FLa3KSiIiGTKVUHhwmSQuVCUtlofLTW+QidHRKTkuCooOEVH\nO/SUICKyKq4KCqdGVXQkInI9XBUUBkZnAdjRVlvglIiIlCbXBAXLsjg1Yj8prNGTgojIargmKIzN\nhgnMh6mv8rKhvqrQyRERKUmuCQoDY6boaHtbDR6PhrcQEVkN1wQFp+ioU5XMIiKr5p6goOaoIiLX\nzTVBQS2PRESunyuCwsxChItTC/i8HrY0ayA8EZHVckVQcHoyd7TUUKk5FEREVs0lQSHW8khERFbP\nFUFBlcwiItnhiqAQe1JQJbOIyPUo+aAwHQxzenyeCg90tKqSWUTkepR8UOgfniJqwa3r66nxeQud\nHBGRklbyQeGlc5MA3LWlscApEREpfSUdFKKWxSt2ULhTQUFE5LqVdFD4YGSWwHyYdfU+trWoPkFE\n5HqVdFB42Sk62tykkVFFRLLAFUFBRUciItlRskFhfDbEe1dn8Xk93LGxvtDJERFxhZINCieGzVPC\n7e1qiioiki0lGxRePus0RW0qcEpERNyjJINCJGpxYngKgDs3qz5BRCRbSjIovHRugpmFCJub/Gxq\n8hc6OSIirlGZh2vsAwJAJ/DEKo4vMT4b4jt95wD49IfaspdKERHJ+ZNCt708bi+7Mjy+RNSyeOTn\nZxifC3N7ez37bl2XpWSWhr6+vkInoWjoXsToXsToXly/XAeF+4Fxe30Q2JPh8SWe/c0VTpyfotHv\n5dA9W/GW2Sxr+oeP0b2I0b2I0b24frkOCs3AWNx2YnlPquOL3rs6w/deuQDAv/3EVtbUVWUrjSIi\nYstHRXOqr/Mpv+5blsW3f3GOiAW//+G13L1VzVBFRHIh1+UvR4CjmDqD/UAH8EgGx3nyySdPtbe3\nb89xOkVEXOXixYsDDzzwwI5CpyNRF3DAXn8QuMNeb05xXERECiDXxUcn7WUvptnpa/b2sRTHRURE\nRFJ6MG59H+YLxYEkrxUpBw8nbC/3uUj7s1LsPZrL/UN/wP45ErevnO/JHuA+ez2jPi4u0435P1jV\nh95lrisDdIGDmN/XsdznIqPPSjEHhXL+0IP5pz6G6eXdaW8796Bc74kVt/55Mujj4jKHgWcxdXMZ\nf+hdpAvztz9uL7sov8/I45jf3bFc36/7McXz8fuSKuagkFHHNhfqJPY7D9rbnyeDP67LdBH7oAM0\nkWYfF5fZD7xirz+CqZcr58+KU3TSibkX5fxlAZbv+5V2fzAo7qCQ0S/iQk8QGwuqGziBuSejca8p\np3vSusy+8urSbuzG/N27iNWvlOtn5SQwhPndnd+/XO9FvOv6XBRzUIDy/NAn6gZeJdZSqxzvSeJT\nApgnJidQtLA0WLrdCLH/B6c8uRz/L5qBU5i6gycw/ZygPO+FI/5z4XyJzOizko9RUlernD/08XqB\nr9nr5XpPOu2fNszv3wU8hfnWfByTGRwtWOryaxTz7RjM/8OdlO//xQHgMWAScw/2U773whH/uejE\nfC48ZPD8iFO1AAAERElEQVRZKeYnhacwvxSU14c+3kFiPbx7Kd978qz9Y2HqEizKt4/LM8T+B5qB\nlynf/wswAQFMhheg/O7FfkyG/0V7e7nPhas+Kwcon6ZlifZgykZP2ct77f3lfE/EOIApNvpGwr5y\n/L94kGub55brvRAREREREREREREREREREREREREREREpRq8C0WV+IpiexCdyeO1oDt8710o57QAP\nkf0RRRP7D4hIiWokNgrpvfa2s683h9ct5Yy1lNPeTO6C/akcva+IFMAYS+fNdkZrBTOUwKvAo/br\nnrePD2AyyPgZ0vbE7X8eE1wSHbWPO2PV7I875+kk58S/bsxOS+L+xPOd8WAetn+XjoTtVOld7nqJ\naY930H5tNO56Tjpexdwnp+d6/Df1x4iN/vmQfa4TkB9iaRAawATvZOlO/J0TPURsmITEa6f7d0z2\ndzjC0klgRKSEpQoKUeBzmMzByRAaMZmXk2k5Qxl/0j72KCaTXo5zjvPe99rv/TRLMxriXueksQOT\nsX4uxfnOse8C25bZXim9ya4Xn/Z4zfb+2+PS4cyi51z3q/b2o5iMFkwGe8q+fpf9um/HnXsUU5y3\nLe7azpNdsnTH/46JTsTtX+7ajddxX3pJ/vcWkRKTKijEj1v/KEvH4nEyk4NcmymMsTwnY30Ik4E5\nOpKc81DCNe/AZGQrne9kkCTZXim9idfrInZ/khUfxX+bfoylwSn+d4qvr9nH0icHp9jOOT6O+f0O\nYL69n7DXk6U78XdMFH9suWtD5vfFOT/x95QiV8xDZ0vxi/+wB7j2w+8BtmO+fcYfs1hZK0unGBzC\nfFNN5BTBOJzRHz+f4vz4Y4nbK6U38XonSe0vMRm6837x14q/RvwcAM9i7sEP7OU3MKPldmMC3ABm\nqtbPY2ZhO5Yi3YnXjddMbDa/la692vsyyPJ/OylSxTx0tpQ+C/Ot8xlMBuP8pJoicRSTCTkSMy5H\nIOF1zoT26Z6/nJXSm+x6yezHBIR7gb32+6ajE5PR7wB2AV+yr9MPHMIUHx2zj90HfB8TKDK9z5lc\nO5v3RURKVKrio/iWJUdYWimZWBbda68/RqzsPFFinYJzzg9YWhzkcIqFuuxzBjAVph0rnJ+Y7sRt\np2x+ufQmu1582uMdiDvXaeETXw4ff934e+tULDfFve5zmHvs1OOAKUZyKrdXus+J10oUn/Zk117t\nfVHxkYiLLBcUnEnjO4EP4o4dIVZpCqYitNFe78VkLlHgJ3H7Ez1tnwexb6dRzMQpyc45QKx1T3zg\nSHZ+YroTt1OlN9n14tMe73n79a8QK0a6d5nrxt9b57yo/XqnvH4PS+/r03b6UqV7ud8xXnyrqGTX\nXun9Ifl92YO5/yIiUiIeJHedzB4m9mQjIiIloAl1XhObt9AJEJGCC2ICwxxwKYvvuw/TKqk/i+8p\nIiIiIiIiIiIiIiIiIiIiIiIiRen/A/RnwxcA8Cf2AAAAAElFTkSuQmCC\n",
       "text": [
        "<matplotlib.figure.Figure at 0x11eff0790>"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The histogram shows visually what mere statistics hints at. The distribution of students is heavily weighted towards those who are solving problems in under 20 seconds. The cumulative distribution function (CDF) shows that roughly 80% of successful students solve the problem within 20 seconds. After 40 seconds, 90% of successful students have finished the problem. Almost no students take longer than 80 seconds."
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "### Completion time by problem\n",
      "\n",
      "OK, let's ask a slightly harder question: how are students doing problem by problem? The answer will take several parts.\n",
      "\n",
      "First, let's get the number of unique problems"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# The unique identifier for each problem is the 'Problem Name'\n",
      "problems = traindata['Problem Name']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Get just the uniques\n",
      "problems = np.unique(problems)\n",
      "print 'Number of unique problems: ', len(problems)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Number of unique problems:  1084\n"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "pmedian_times = {}\n",
      "for p in problems:\n",
      "    pmedian_times[p] = traindata[traindata['Problem Name'] == p]['Correct Step Duration (sec)'].median()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import operator"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sorted_times = sorted(pmedian_times.iteritems(), key=operator.itemgetter(1), reverse=True)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "traindata.columns"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 13,
       "text": [
        "Index([u'Row', u'Anon Student Id', u'Problem Hierarchy', u'Problem Name', u'Problem View', u'Step Name', u'Step Start Time', u'First Transaction Time', u'Correct Transaction Time', u'Step End Time', u'Step Duration (sec)', u'Correct Step Duration (sec)', u'Error Step Duration (sec)', u'Correct First Attempt', u'Incorrects', u'Hints', u'Corrects', u'KC(Default)', u'Opportunity(Default)'], dtype='object')"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "traindata['Step Name']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 14,
       "text": [
        "0        3(x+2) = 15\n",
        "1            x+2 = 5\n",
        "2          2-8y = -4\n",
        "3           -8y = -6\n",
        "4         -7y-5 = -4\n",
        "5            -7y = 1\n",
        "6           7y+4 = 7\n",
        "7             7y = 3\n",
        "8         -5+9y = -6\n",
        "9            9y = -1\n",
        "10        -7-3x = -2\n",
        "11    -7-3x+7 = -2+7\n",
        "12           -3x = 5\n",
        "13     -3x/-3 = 5/-3\n",
        "14         -9 = 8y+9\n",
        "...\n",
        "809679             -4x = 5\n",
        "809680       -4x/-4 = 5/-4\n",
        "809681            x = 5/-4\n",
        "809682          0 = -1y-10\n",
        "809683    0+10 = -1y-10+10\n",
        "809684      10 = -1y-10+10\n",
        "809685            10 = -1y\n",
        "809686      10/-1 = -1y/-1\n",
        "809687        -10 = -1y/-1\n",
        "809688           -7+2x = 4\n",
        "809689       -7+2x+7 = 4+7\n",
        "809690            2x = 4+7\n",
        "809691             2x = 11\n",
        "809692         2x/2 = 11/2\n",
        "809693           -2+5x = 8\n",
        "Name: Step Name, Length: 809694, dtype: object"
       ]
      }
     ],
     "prompt_number": 14
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "traindata.columns"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 16,
       "text": [
        "Index([u'Row', u'Anon Student Id', u'Problem Hierarchy', u'Problem Name', u'Problem View', u'Step Name', u'Step Start Time', u'First Transaction Time', u'Correct Transaction Time', u'Step End Time', u'Step Duration (sec)', u'Correct Step Duration (sec)', u'Error Step Duration (sec)', u'Correct First Attempt', u'Incorrects', u'Hints', u'Corrects', u'KC(Default)', u'Opportunity(Default)'], dtype='object')"
       ]
      }
     ],
     "prompt_number": 16
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import sklearn"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sklearn.__version__"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 20,
       "text": [
        "'0.14.1'"
       ]
      }
     ],
     "prompt_number": 20
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}