{ "metadata": { "name": "", "signature": "sha256:4a195ae44b4a73879282646f8181c8e44c202ec3d98599c5dbd34984a45547a7" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Education Data Mining Testing System\n", "\n", "We need an in-house testing system to validate our machine learning algorithm. We need this in order to iterate towards better solutions. I am basing this in-house testing system on the Yu et al. JMLR Workshop and Conference Proceedings paper that the winning team submitted. The [leaderboard](https://pslcdatashop.web.cmu.edu/KDDCup/results_full.jsp) contains the full list of submissions and links to papers.\n", "\n", "In the Yu et al. paper, the main reason why they built their own testing system instead of just submitting to their answers and having the KDD Cup server score it was to avoid overfitting the solution." ] }, { "cell_type": "code", "collapsed": false, "input": [ "%matplotlib inline\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import sklearn\n", "from sklearn.cross_validation import cross_val_score" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 137 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Feature engineering" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# Get the data: Algebra 2005-2006 (A56) and/or Algebra 2008-2009 (A89)\n", "a56_train_filepath = 'data/algebra0506/algebra_2005_2006_train.txt'\n", "#a89_train_filepath = 'data/algebra0809/algebra_2008_2009_train.txt'\n", "\n", "a56data = pd.read_table(a56_train_filepath)\n", "#a89data = pd.read_table(a89_train_filepath)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 104 }, { "cell_type": "code", "collapsed": false, "input": [ "hierarchy = a56data['Problem Hierarchy']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 105 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Split the problem hierarchy into 'Units' and 'Sections'" ] }, { "cell_type": "code", "collapsed": false, "input": [ "units, sections = [], []\n", "for i in range(len(hierarchy)):\n", " units.append(hierarchy[i].split(',')[0].strip())\n", " sections.append(hierarchy[i].split(',')[1].strip())" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 106 }, { "cell_type": "code", "collapsed": false, "input": [ "# Now add 'Units' and 'Sections' as columns within the dataframe\n", "a56data['Problem Unit'] = pd.Series(units, index=a56data.index)\n", "a56data['Problem Section'] = pd.Series(sections, index=a56data.index)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 107 }, { "cell_type": "code", "collapsed": false, "input": [ "# Rearrange order of columns\n", "cols = a56data.columns.tolist()\n", "cols = cols[0:3]+cols[-2::]+cols[3:-2]\n", "a56data = a56data[cols]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 108 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Create a temporary dataframe for the addition of new binary features" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df = a56data" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 109 }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Map string category values to integers" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cats = ['Anon Student Id', 'Problem Hierarchy', 'Problem Unit', 'Problem Section', 'Problem Name']\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 110 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Student IDs" ] }, { "cell_type": "code", "collapsed": false, "input": [ "sids = list(set(df['Anon Student Id']))\n", "sid_dict = {}\n", "for idx,sid in enumerate(sids):\n", " sid_dict[idx] = sid\n", " df.loc[df['Anon Student Id'] == sid,'Anon Student Id'] = idx" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 111 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Problem Hierarchy" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cat = 'Problem Hierarchy'\n", "prhs = list(set(df[cat]))\n", "prh_dict = {}\n", "for idx,prh in enumerate(prhs):\n", " prh_dict[idx] = prh\n", " df.loc[df[cat] == prh,cat] = idx" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 112 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Problem Unit" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cat = 'Problem Unit'\n", "prus = list(set(df[cat]))\n", "pru_dict = {}\n", "for idx,pru in enumerate(prus):\n", " pru_dict[idx] = pru\n", " df.loc[df[cat] == pru,cat] = idx" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 113 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Problem Section" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cat = 'Problem Section'\n", "prss = list(set(df[cat]))\n", "prs_dict = {}\n", "for idx,prs in enumerate(prss):\n", " prs_dict[idx] = prs\n", " df.loc[df[cat] == prs,cat] = idx" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 114 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Problem Name" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cat = 'Problem Name'\n", "prns = list(set(df[cat]))\n", "prn_dict = {}\n", "for idx,prn in enumerate(prns):\n", " prn_dict[idx] = prn\n", " df.loc[df[cat] == prn,cat] = idx" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 115 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Step Name" ] }, { "cell_type": "code", "collapsed": false, "input": [ "cat = 'Step Name'\n", "stns = list(set(df[cat]))\n", "stn_dict = {}\n", "for idx,stn in enumerate(stns):\n", " stn_dict[idx] = stn\n", " df.loc[df[cat] == stn,cat] = idx" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mstn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mstn_dict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcat\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mstn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mcat\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0midx\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, value)\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_to_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mis_setter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 98\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_setitem_with_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 99\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 100\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_has_valid_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/pandas/core/indexing.pyc\u001b[0m in \u001b[0;36m_setitem_with_indexer\u001b[0;34m(self, indexer, value)\u001b[0m\n\u001b[1;32m 404\u001b[0m \u001b[0;31m# scalar\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 405\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mitem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 406\u001b[0;31m \u001b[0msetter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 407\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 408\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "prompt_number": 160 }, { "cell_type": "code", "collapsed": false, "input": [ "df" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RowAnon Student IdProblem HierarchyProblem UnitProblem SectionProblem NameProblem ViewStep NameStep Start TimeFirst Transaction TimeCorrect Transaction TimeStep End TimeStep Duration (sec)Correct Step Duration (sec)Error Step Duration (sec)Correct First AttemptIncorrectsHintsCorrectsKC(Default)
0 1 104 87 3 17 1046 1 3(x+2) = 15 2005-09-09 12:24:35.0 2005-09-09 12:24:49.0 2005-09-09 12:25:15.0 2005-09-09 12:25:15.0 40 NaN 40 0 2 3 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
1 2 104 87 3 17 1046 1 x+2 = 5 2005-09-09 12:25:15.0 2005-09-09 12:25:31.0 2005-09-09 12:25:31.0 2005-09-09 12:25:31.0 16 16 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
2 3 104 87 3 17 346 1 2-8y = -4 2005-09-09 12:25:36.0 2005-09-09 12:25:43.0 2005-09-09 12:26:12.0 2005-09-09 12:26:12.0 36 NaN 36 0 2 3 1 [SkillRule: Remove constant; {ax+b=c, positive......
3 4 104 87 3 17 346 1 -8y = -6 2005-09-09 12:26:12.0 2005-09-09 12:26:34.0 2005-09-09 12:26:34.0 2005-09-09 12:26:34.0 22 22 NaN 1 0 0 1 [SkillRule: Remove coefficient; {ax+b=c, divid......
4 5 104 87 3 17 346 2 -7y-5 = -4 2005-09-09 12:26:38.0 2005-09-09 12:28:36.0 2005-09-09 12:28:36.0 2005-09-09 12:28:36.0 118 118 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
5 6 104 87 3 17 346 2 -7y = 1 2005-09-09 12:28:36.0 2005-09-09 12:28:43.0 2005-09-09 12:28:51.0 2005-09-09 12:28:51.0 15 NaN 15 0 1 0 1 [SkillRule: Remove coefficient; {ax+b=c, divid......
6 7 104 87 3 17 346 3 7y+4 = 7 2005-09-09 12:28:57.0 2005-09-09 12:29:09.0 2005-09-09 12:29:09.0 2005-09-09 12:29:09.0 12 12 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
7 8 104 87 3 17 346 3 7y = 3 2005-09-09 12:29:09.0 2005-09-09 12:29:14.0 2005-09-09 12:29:14.0 2005-09-09 12:29:14.0 5 5 NaN 1 0 0 1 [SkillRule: Remove positive coefficient; {ax/b......
8 9 104 87 3 17 346 4 -5+9y = -6 2005-09-09 12:29:19.0 2005-09-09 12:29:31.0 2005-09-09 12:29:31.0 2005-09-09 12:29:31.0 12 12 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
9 10 104 87 3 17 346 4 9y = -1 2005-09-09 12:29:31.0 2005-09-09 12:29:36.0 2005-09-09 12:29:36.0 2005-09-09 12:29:36.0 5 5 NaN 1 0 0 1 [SkillRule: Remove positive coefficient; {ax/b......
10 11 104 90 3 15 346 1 -7-3x = -2 2005-09-09 12:29:41.0 2005-09-09 12:30:27.0 2005-09-09 12:30:27.0 2005-09-09 12:30:27.0 46 46 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
11 12 104 90 3 15 346 1 -7-3x+7 = -2+7 2005-09-09 12:30:27.0 2005-09-09 12:30:34.0 2005-09-09 12:30:45.0 2005-09-09 12:30:49.0 22 NaN 22 0 1 0 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol......
12 13 104 90 3 15 346 1 -3x = 5 2005-09-09 12:30:49.0 2005-09-09 12:31:04.0 2005-09-09 12:31:04.0 2005-09-09 12:31:04.0 15 15 NaN 1 0 0 1 [SkillRule: Remove coefficient; {ax+b=c, divid......
13 14 104 90 3 15 346 1 -3x/-3 = 5/-3 2005-09-09 12:31:04.0 2005-09-09 12:31:07.0 2005-09-09 12:31:07.0 2005-09-09 12:31:12.0 8 8 NaN 1 0 0 2 [SkillRule: Multiply/Divide; [Typein Skill: {R......
14 15 104 90 3 15 346 2 -9 = 8y+9 2005-09-09 12:31:16.0 2005-09-09 12:31:29.0 2005-09-09 12:31:29.0 2005-09-09 12:31:29.0 13 13 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
15 16 104 90 3 15 346 2 -9-9 = 8y+9-9 2005-09-09 12:31:29.0 2005-09-09 12:31:32.0 2005-09-09 12:31:32.0 2005-09-09 12:31:39.0 10 10 NaN 1 0 0 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol......
16 17 104 90 3 15 346 2 -18 = 8y 2005-09-09 12:31:39.0 2005-09-09 12:31:44.0 2005-09-09 12:31:44.0 2005-09-09 12:31:44.0 5 5 NaN 1 0 0 1 [SkillRule: Remove positive coefficient; {ax/b......
17 18 104 90 3 15 346 2 -18/8 = 8y/8 2005-09-09 12:31:44.0 2005-09-09 12:31:46.0 2005-09-09 12:31:46.0 2005-09-09 12:32:56.0 72 72 NaN 1 2 0 2 [SkillRule: Multiply/Divide; [Typein Skill: {R......
18 19 104 90 3 15 346 3 -2-2x = 9 2005-09-09 12:33:01.0 2005-09-09 12:33:22.0 2005-09-09 12:33:32.0 2005-09-09 12:33:32.0 31 NaN 31 0 1 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
19 20 104 90 3 15 346 3 -2-2x+2 = 9+2 2005-09-09 12:33:32.0 2005-09-09 12:33:37.0 2005-09-09 12:33:37.0 2005-09-09 12:33:40.0 8 8 NaN 1 0 0 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol......
20 21 104 90 3 15 346 3 -2x = 11 2005-09-09 12:33:40.0 2005-09-09 12:33:46.0 2005-09-09 12:33:46.0 2005-09-09 12:33:46.0 6 6 NaN 1 0 0 1 [SkillRule: Remove coefficient; {ax+b=c, divid......
21 22 104 90 3 15 346 3 -2x/-2 = 11/-2 2005-09-09 12:33:46.0 2005-09-09 12:33:51.0 2005-09-09 12:33:51.0 2005-09-09 12:33:55.0 9 9 NaN 1 0 0 2 [SkillRule: Multiply/Divide; [Typein Skill: {R......
22 23 104 90 3 15 346 4 4+4y = -6 2005-09-09 12:33:59.0 2005-09-09 12:34:06.0 2005-09-09 12:34:06.0 2005-09-09 12:34:06.0 7 7 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
23 24 104 90 3 15 346 4 4+4y-4 = -6-4 2005-09-09 12:34:06.0 2005-09-09 12:34:09.0 2005-09-09 12:34:09.0 2005-09-09 12:34:17.0 11 11 NaN 1 0 0 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol......
24 25 104 90 3 15 346 4 4y = -6-4 2005-09-09 12:34:17.0 2005-09-09 12:34:25.0 2005-09-09 12:34:25.0 2005-09-09 12:34:25.0 8 8 NaN 1 0 0 1 [SkillRule: Consolidate vars with coeff; CLT]...
25 26 104 90 3 15 346 4 FinalAnswer 2005-09-09 12:34:25.0 2005-09-09 12:34:29.0 2005-09-09 12:34:33.0 2005-09-09 12:34:33.0 8 NaN 8 0 1 0 1 combine-like-terms-sp...
26 27 104 90 3 15 346 4 4y = -10 2005-09-09 12:34:33.0 2005-09-09 12:34:42.0 2005-09-09 12:34:42.0 2005-09-09 12:34:42.0 9 9 NaN 1 0 0 1 [SkillRule: Remove positive coefficient; {ax/b......
27 28 104 90 3 15 346 4 4y/4 = -10/4 2005-09-09 12:34:42.0 2005-09-09 12:34:46.0 2005-09-09 12:34:46.0 2005-09-09 12:35:02.0 20 20 NaN 1 0 0 2 [SkillRule: Multiply/Divide; [Typein Skill: {R......
28 29 104 89 3 16 668 1 -7 = -5(y+7) 2005-09-09 12:35:08.0 2005-09-09 12:36:27.0 2005-09-09 12:37:01.0 2005-09-09 12:37:01.0 113 NaN 113 0 2 3 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
29 30 104 89 3 16 668 1 7/5 = y+7 2005-09-09 12:37:01.0 2005-09-09 12:37:09.0 2005-09-09 12:37:09.0 2005-09-09 12:37:09.0 8 8 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
30 31 104 89 3 16 668 2 -7(x+9) = -5 2005-09-09 12:37:17.0 2005-09-09 12:38:15.0 2005-09-09 12:38:15.0 2005-09-09 12:38:15.0 58 58 NaN 1 0 0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
31 32 104 89 3 16 668 2 x+9 = 5/7 2005-09-09 12:38:15.0 2005-09-09 12:38:21.0 2005-09-09 12:38:21.0 2005-09-09 12:38:21.0 6 6 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
32 33 104 89 3 16 668 3 5 = 8(y+1) 2005-09-09 12:38:25.0 2005-09-09 12:38:33.0 2005-09-09 12:38:33.0 2005-09-09 12:38:33.0 8 8 NaN 1 0 0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
33 34 104 89 3 16 668 3 5/8 = y+1 2005-09-09 12:38:33.0 2005-09-09 12:38:40.0 2005-09-09 12:38:40.0 2005-09-09 12:38:40.0 7 7 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
34 35 104 89 3 16 668 4 0 = -3(x-5) 2005-09-09 12:38:44.0 2005-09-09 12:38:49.0 2005-09-09 12:38:49.0 2005-09-09 12:38:49.0 5 5 NaN 1 0 0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
35 36 104 89 3 16 668 4 0 = x-5 2005-09-09 12:38:49.0 2005-09-09 12:38:56.0 2005-09-09 12:38:56.0 2005-09-09 12:38:56.0 7 7 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
36 37 104 84 3 13 668 1 -5(y-10) = 3 2005-09-09 12:39:01.0 2005-09-09 12:39:07.0 2005-09-09 12:39:07.0 2005-09-09 12:39:07.0 6 6 NaN 1 0 0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
37 38 104 84 3 13 668 1 -5(y-10)/-5 = 3/-5 2005-09-09 12:39:07.0 2005-09-09 12:39:13.0 2005-09-09 12:39:13.0 2005-09-09 12:39:18.0 11 11 NaN 1 0 0 2 [SkillRule: Calculate Eliminate Parens; [Typei......
38 39 104 84 3 13 668 1 y-10 = 3/-5 2005-09-09 12:39:18.0 2005-09-09 12:39:26.0 2005-09-09 12:39:26.0 2005-09-09 12:39:26.0 8 8 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
39 40 104 84 3 13 668 1 y-10+10 = 3/-5+10 2005-09-09 12:39:26.0 2005-09-09 12:39:29.0 2005-09-09 12:39:29.0 2005-09-09 12:39:36.0 10 10 NaN 1 0 0 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol......
40 41 104 84 3 13 668 1 y = 3/-5+10 2005-09-09 12:39:36.0 2005-09-09 12:39:44.0 2005-09-09 12:40:14.0 2005-09-09 12:40:14.0 38 NaN 38 0 1 3 1 NaN...
41 42 104 84 3 13 668 1 FinalAnswer 2005-09-09 12:40:14.0 2005-09-09 12:40:36.0 2005-09-09 12:40:36.0 2005-09-09 12:41:14.0 39 39 NaN 1 0 0 2 simplify-fractions-sp~~combine-like-terms-sp...
42 43 104 84 3 13 668 1 y = -3/5+10 2005-09-09 12:40:36.0 2005-09-09 12:40:57.0 2005-09-09 12:40:57.0 2005-09-09 12:40:57.0 21 21 NaN 1 0 0 1 [SkillRule: Consolidate vars, no coeff; CLT]...
43 44 104 84 3 13 668 2 4(x-4) = -8 2005-09-09 12:41:48.0 2005-09-09 12:42:13.0 2005-09-09 12:42:13.0 2005-09-09 12:42:13.0 25 25 NaN 1 0 0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
44 45 104 84 3 13 668 2 4(x-4)/4 = -8/4 2005-09-09 12:42:13.0 2005-09-09 12:42:18.0 2005-09-09 12:42:18.0 2005-09-09 12:42:23.0 10 10 NaN 1 0 0 2 [SkillRule: Calculate Eliminate Parens; [Typei......
45 46 104 84 3 13 668 2 x-4 = -8/4 2005-09-09 12:42:23.0 2005-09-09 12:42:32.0 2005-09-09 12:42:32.0 2005-09-09 12:42:32.0 9 9 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
46 47 104 84 3 13 668 2 x-4+4 = -8/4+4 2005-09-09 12:42:32.0 2005-09-09 12:42:35.0 2005-09-09 12:42:35.0 2005-09-09 12:42:55.0 23 23 NaN 1 0 0 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol......
47 48 104 84 3 13 668 2 x = -8/4+4 2005-09-09 12:42:55.0 2005-09-09 12:43:02.0 2005-09-09 12:43:02.0 2005-09-09 12:43:02.0 7 7 NaN 1 0 0 1 [SkillRule: Consolidate vars, no coeff; CLT]...
48 49 104 84 3 13 668 2 FinalAnswer 2005-09-09 12:43:02.0 2005-09-09 12:43:25.0 2005-09-09 12:43:25.0 2005-09-09 12:43:25.0 23 23 NaN 1 0 0 1 simplify-fractions-sp~~combine-like-terms-sp...
49 50 104 84 3 13 668 3 5 = 4(x-3) 2005-09-09 12:43:34.0 2005-09-09 12:43:41.0 2005-09-09 12:43:41.0 2005-09-09 12:43:41.0 7 7 NaN 1 0 0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
50 51 104 84 3 13 668 3 5/4 = 4(x-3)/4 2005-09-09 12:43:41.0 2005-09-09 12:43:45.0 2005-09-09 12:43:45.0 2005-09-09 12:43:49.0 8 8 NaN 1 0 0 2 [SkillRule: Calculate Eliminate Parens; [Typei......
51 52 104 84 3 13 668 3 5/4 = x-3 2005-09-09 12:43:49.0 2005-09-09 12:44:09.0 2005-09-09 12:44:09.0 2005-09-09 12:44:09.0 20 20 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
52 53 104 84 3 13 668 3 5/4+3 = x-3+3 2005-09-09 12:44:09.0 2005-09-09 12:44:12.0 2005-09-09 12:44:12.0 2005-09-09 12:44:19.0 10 10 NaN 1 0 0 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol......
53 54 104 84 3 13 668 3 5/4+3 = x 2005-09-09 12:44:19.0 2005-09-09 12:44:35.0 2005-09-09 12:44:35.0 2005-09-09 12:44:35.0 16 16 NaN 1 0 0 1 [SkillRule: Consolidate vars, no coeff; CLT]...
54 55 104 84 3 13 668 3 FinalAnswer 2005-09-09 12:44:35.0 2005-09-09 12:44:48.0 2005-09-09 12:44:48.0 2005-09-09 12:44:48.0 13 13 NaN 1 0 0 1 simplify-fractions-sp~~combine-like-terms-sp...
55 56 104 83 3 14 936 1 0.1 = -42.3(y-83.7) 2005-09-09 12:44:58.0 2005-09-09 12:45:42.0 2005-09-09 12:45:42.0 2005-09-09 12:45:42.0 44 44 NaN 1 0 0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
56 57 104 83 3 14 936 1 -0.00236407 = y-83.7 2005-09-09 12:45:42.0 2005-09-09 12:45:56.0 2005-09-09 12:45:56.0 2005-09-09 12:45:56.0 14 14 NaN 1 0 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
57 58 104 83 3 14 667 1 1.8(y-9.8) = -2.4 2005-09-09 12:46:01.0 2005-09-09 12:46:48.0 2005-09-09 12:46:57.0 2005-09-09 12:46:57.0 56 NaN 56 0 1 0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
58 59 104 83 3 14 667 1 y-9.8 = -1.33333333 2005-09-09 12:46:57.0 2005-09-09 12:47:49.0 2005-09-09 12:48:21.0 2005-09-09 12:48:21.0 84 NaN 84 0 1 0 1 [SkillRule: Remove constant; {ax+b=c, positive......
59 60 104 83 3 14 667 2 -5.5(y-1.9) = -9.6 2005-09-09 12:48:26.0 2005-09-09 12:49:24.0 2005-09-09 12:49:24.0 2005-09-09 12:49:24.0 58 58 NaN 1 0 0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT......
............................................................
\n", "

809694 rows \u00d7 21 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 116, "text": [ " Row Anon Student Id Problem Hierarchy Problem Unit Problem Section \\\n", "0 1 104 87 3 17 \n", "1 2 104 87 3 17 \n", "2 3 104 87 3 17 \n", "3 4 104 87 3 17 \n", "4 5 104 87 3 17 \n", "5 6 104 87 3 17 \n", "6 7 104 87 3 17 \n", "7 8 104 87 3 17 \n", "8 9 104 87 3 17 \n", "9 10 104 87 3 17 \n", "10 11 104 90 3 15 \n", "11 12 104 90 3 15 \n", "12 13 104 90 3 15 \n", "13 14 104 90 3 15 \n", "14 15 104 90 3 15 \n", "15 16 104 90 3 15 \n", "16 17 104 90 3 15 \n", "17 18 104 90 3 15 \n", "18 19 104 90 3 15 \n", "19 20 104 90 3 15 \n", "20 21 104 90 3 15 \n", "21 22 104 90 3 15 \n", "22 23 104 90 3 15 \n", "23 24 104 90 3 15 \n", "24 25 104 90 3 15 \n", "25 26 104 90 3 15 \n", "26 27 104 90 3 15 \n", "27 28 104 90 3 15 \n", "28 29 104 89 3 16 \n", "29 30 104 89 3 16 \n", "30 31 104 89 3 16 \n", "31 32 104 89 3 16 \n", "32 33 104 89 3 16 \n", "33 34 104 89 3 16 \n", "34 35 104 89 3 16 \n", "35 36 104 89 3 16 \n", "36 37 104 84 3 13 \n", "37 38 104 84 3 13 \n", "38 39 104 84 3 13 \n", "39 40 104 84 3 13 \n", "40 41 104 84 3 13 \n", "41 42 104 84 3 13 \n", "42 43 104 84 3 13 \n", "43 44 104 84 3 13 \n", "44 45 104 84 3 13 \n", "45 46 104 84 3 13 \n", "46 47 104 84 3 13 \n", "47 48 104 84 3 13 \n", "48 49 104 84 3 13 \n", "49 50 104 84 3 13 \n", "50 51 104 84 3 13 \n", "51 52 104 84 3 13 \n", "52 53 104 84 3 13 \n", "53 54 104 84 3 13 \n", "54 55 104 84 3 13 \n", "55 56 104 83 3 14 \n", "56 57 104 83 3 14 \n", "57 58 104 83 3 14 \n", "58 59 104 83 3 14 \n", "59 60 104 83 3 14 \n", " ... ... ... ... ... \n", "\n", " Problem Name Problem View Step Name Step Start Time \\\n", "0 1046 1 3(x+2) = 15 2005-09-09 12:24:35.0 \n", "1 1046 1 x+2 = 5 2005-09-09 12:25:15.0 \n", "2 346 1 2-8y = -4 2005-09-09 12:25:36.0 \n", "3 346 1 -8y = -6 2005-09-09 12:26:12.0 \n", "4 346 2 -7y-5 = -4 2005-09-09 12:26:38.0 \n", "5 346 2 -7y = 1 2005-09-09 12:28:36.0 \n", "6 346 3 7y+4 = 7 2005-09-09 12:28:57.0 \n", "7 346 3 7y = 3 2005-09-09 12:29:09.0 \n", "8 346 4 -5+9y = -6 2005-09-09 12:29:19.0 \n", "9 346 4 9y = -1 2005-09-09 12:29:31.0 \n", "10 346 1 -7-3x = -2 2005-09-09 12:29:41.0 \n", "11 346 1 -7-3x+7 = -2+7 2005-09-09 12:30:27.0 \n", "12 346 1 -3x = 5 2005-09-09 12:30:49.0 \n", "13 346 1 -3x/-3 = 5/-3 2005-09-09 12:31:04.0 \n", "14 346 2 -9 = 8y+9 2005-09-09 12:31:16.0 \n", "15 346 2 -9-9 = 8y+9-9 2005-09-09 12:31:29.0 \n", "16 346 2 -18 = 8y 2005-09-09 12:31:39.0 \n", "17 346 2 -18/8 = 8y/8 2005-09-09 12:31:44.0 \n", "18 346 3 -2-2x = 9 2005-09-09 12:33:01.0 \n", "19 346 3 -2-2x+2 = 9+2 2005-09-09 12:33:32.0 \n", "20 346 3 -2x = 11 2005-09-09 12:33:40.0 \n", "21 346 3 -2x/-2 = 11/-2 2005-09-09 12:33:46.0 \n", "22 346 4 4+4y = -6 2005-09-09 12:33:59.0 \n", "23 346 4 4+4y-4 = -6-4 2005-09-09 12:34:06.0 \n", "24 346 4 4y = -6-4 2005-09-09 12:34:17.0 \n", "25 346 4 FinalAnswer 2005-09-09 12:34:25.0 \n", "26 346 4 4y = -10 2005-09-09 12:34:33.0 \n", "27 346 4 4y/4 = -10/4 2005-09-09 12:34:42.0 \n", "28 668 1 -7 = -5(y+7) 2005-09-09 12:35:08.0 \n", "29 668 1 7/5 = y+7 2005-09-09 12:37:01.0 \n", "30 668 2 -7(x+9) = -5 2005-09-09 12:37:17.0 \n", "31 668 2 x+9 = 5/7 2005-09-09 12:38:15.0 \n", "32 668 3 5 = 8(y+1) 2005-09-09 12:38:25.0 \n", "33 668 3 5/8 = y+1 2005-09-09 12:38:33.0 \n", "34 668 4 0 = -3(x-5) 2005-09-09 12:38:44.0 \n", "35 668 4 0 = x-5 2005-09-09 12:38:49.0 \n", "36 668 1 -5(y-10) = 3 2005-09-09 12:39:01.0 \n", "37 668 1 -5(y-10)/-5 = 3/-5 2005-09-09 12:39:07.0 \n", "38 668 1 y-10 = 3/-5 2005-09-09 12:39:18.0 \n", "39 668 1 y-10+10 = 3/-5+10 2005-09-09 12:39:26.0 \n", "40 668 1 y = 3/-5+10 2005-09-09 12:39:36.0 \n", "41 668 1 FinalAnswer 2005-09-09 12:40:14.0 \n", "42 668 1 y = -3/5+10 2005-09-09 12:40:36.0 \n", "43 668 2 4(x-4) = -8 2005-09-09 12:41:48.0 \n", "44 668 2 4(x-4)/4 = -8/4 2005-09-09 12:42:13.0 \n", "45 668 2 x-4 = -8/4 2005-09-09 12:42:23.0 \n", "46 668 2 x-4+4 = -8/4+4 2005-09-09 12:42:32.0 \n", "47 668 2 x = -8/4+4 2005-09-09 12:42:55.0 \n", "48 668 2 FinalAnswer 2005-09-09 12:43:02.0 \n", "49 668 3 5 = 4(x-3) 2005-09-09 12:43:34.0 \n", "50 668 3 5/4 = 4(x-3)/4 2005-09-09 12:43:41.0 \n", "51 668 3 5/4 = x-3 2005-09-09 12:43:49.0 \n", "52 668 3 5/4+3 = x-3+3 2005-09-09 12:44:09.0 \n", "53 668 3 5/4+3 = x 2005-09-09 12:44:19.0 \n", "54 668 3 FinalAnswer 2005-09-09 12:44:35.0 \n", "55 936 1 0.1 = -42.3(y-83.7) 2005-09-09 12:44:58.0 \n", "56 936 1 -0.00236407 = y-83.7 2005-09-09 12:45:42.0 \n", "57 667 1 1.8(y-9.8) = -2.4 2005-09-09 12:46:01.0 \n", "58 667 1 y-9.8 = -1.33333333 2005-09-09 12:46:57.0 \n", "59 667 2 -5.5(y-1.9) = -9.6 2005-09-09 12:48:26.0 \n", " ... ... ... ... \n", "\n", " First Transaction Time Correct Transaction Time Step End Time \\\n", "0 2005-09-09 12:24:49.0 2005-09-09 12:25:15.0 2005-09-09 12:25:15.0 \n", "1 2005-09-09 12:25:31.0 2005-09-09 12:25:31.0 2005-09-09 12:25:31.0 \n", "2 2005-09-09 12:25:43.0 2005-09-09 12:26:12.0 2005-09-09 12:26:12.0 \n", "3 2005-09-09 12:26:34.0 2005-09-09 12:26:34.0 2005-09-09 12:26:34.0 \n", "4 2005-09-09 12:28:36.0 2005-09-09 12:28:36.0 2005-09-09 12:28:36.0 \n", "5 2005-09-09 12:28:43.0 2005-09-09 12:28:51.0 2005-09-09 12:28:51.0 \n", "6 2005-09-09 12:29:09.0 2005-09-09 12:29:09.0 2005-09-09 12:29:09.0 \n", "7 2005-09-09 12:29:14.0 2005-09-09 12:29:14.0 2005-09-09 12:29:14.0 \n", "8 2005-09-09 12:29:31.0 2005-09-09 12:29:31.0 2005-09-09 12:29:31.0 \n", "9 2005-09-09 12:29:36.0 2005-09-09 12:29:36.0 2005-09-09 12:29:36.0 \n", "10 2005-09-09 12:30:27.0 2005-09-09 12:30:27.0 2005-09-09 12:30:27.0 \n", "11 2005-09-09 12:30:34.0 2005-09-09 12:30:45.0 2005-09-09 12:30:49.0 \n", "12 2005-09-09 12:31:04.0 2005-09-09 12:31:04.0 2005-09-09 12:31:04.0 \n", "13 2005-09-09 12:31:07.0 2005-09-09 12:31:07.0 2005-09-09 12:31:12.0 \n", "14 2005-09-09 12:31:29.0 2005-09-09 12:31:29.0 2005-09-09 12:31:29.0 \n", "15 2005-09-09 12:31:32.0 2005-09-09 12:31:32.0 2005-09-09 12:31:39.0 \n", "16 2005-09-09 12:31:44.0 2005-09-09 12:31:44.0 2005-09-09 12:31:44.0 \n", "17 2005-09-09 12:31:46.0 2005-09-09 12:31:46.0 2005-09-09 12:32:56.0 \n", "18 2005-09-09 12:33:22.0 2005-09-09 12:33:32.0 2005-09-09 12:33:32.0 \n", "19 2005-09-09 12:33:37.0 2005-09-09 12:33:37.0 2005-09-09 12:33:40.0 \n", "20 2005-09-09 12:33:46.0 2005-09-09 12:33:46.0 2005-09-09 12:33:46.0 \n", "21 2005-09-09 12:33:51.0 2005-09-09 12:33:51.0 2005-09-09 12:33:55.0 \n", "22 2005-09-09 12:34:06.0 2005-09-09 12:34:06.0 2005-09-09 12:34:06.0 \n", "23 2005-09-09 12:34:09.0 2005-09-09 12:34:09.0 2005-09-09 12:34:17.0 \n", "24 2005-09-09 12:34:25.0 2005-09-09 12:34:25.0 2005-09-09 12:34:25.0 \n", "25 2005-09-09 12:34:29.0 2005-09-09 12:34:33.0 2005-09-09 12:34:33.0 \n", "26 2005-09-09 12:34:42.0 2005-09-09 12:34:42.0 2005-09-09 12:34:42.0 \n", "27 2005-09-09 12:34:46.0 2005-09-09 12:34:46.0 2005-09-09 12:35:02.0 \n", "28 2005-09-09 12:36:27.0 2005-09-09 12:37:01.0 2005-09-09 12:37:01.0 \n", "29 2005-09-09 12:37:09.0 2005-09-09 12:37:09.0 2005-09-09 12:37:09.0 \n", "30 2005-09-09 12:38:15.0 2005-09-09 12:38:15.0 2005-09-09 12:38:15.0 \n", "31 2005-09-09 12:38:21.0 2005-09-09 12:38:21.0 2005-09-09 12:38:21.0 \n", "32 2005-09-09 12:38:33.0 2005-09-09 12:38:33.0 2005-09-09 12:38:33.0 \n", "33 2005-09-09 12:38:40.0 2005-09-09 12:38:40.0 2005-09-09 12:38:40.0 \n", "34 2005-09-09 12:38:49.0 2005-09-09 12:38:49.0 2005-09-09 12:38:49.0 \n", "35 2005-09-09 12:38:56.0 2005-09-09 12:38:56.0 2005-09-09 12:38:56.0 \n", "36 2005-09-09 12:39:07.0 2005-09-09 12:39:07.0 2005-09-09 12:39:07.0 \n", "37 2005-09-09 12:39:13.0 2005-09-09 12:39:13.0 2005-09-09 12:39:18.0 \n", "38 2005-09-09 12:39:26.0 2005-09-09 12:39:26.0 2005-09-09 12:39:26.0 \n", "39 2005-09-09 12:39:29.0 2005-09-09 12:39:29.0 2005-09-09 12:39:36.0 \n", "40 2005-09-09 12:39:44.0 2005-09-09 12:40:14.0 2005-09-09 12:40:14.0 \n", "41 2005-09-09 12:40:36.0 2005-09-09 12:40:36.0 2005-09-09 12:41:14.0 \n", "42 2005-09-09 12:40:57.0 2005-09-09 12:40:57.0 2005-09-09 12:40:57.0 \n", "43 2005-09-09 12:42:13.0 2005-09-09 12:42:13.0 2005-09-09 12:42:13.0 \n", "44 2005-09-09 12:42:18.0 2005-09-09 12:42:18.0 2005-09-09 12:42:23.0 \n", "45 2005-09-09 12:42:32.0 2005-09-09 12:42:32.0 2005-09-09 12:42:32.0 \n", "46 2005-09-09 12:42:35.0 2005-09-09 12:42:35.0 2005-09-09 12:42:55.0 \n", "47 2005-09-09 12:43:02.0 2005-09-09 12:43:02.0 2005-09-09 12:43:02.0 \n", "48 2005-09-09 12:43:25.0 2005-09-09 12:43:25.0 2005-09-09 12:43:25.0 \n", "49 2005-09-09 12:43:41.0 2005-09-09 12:43:41.0 2005-09-09 12:43:41.0 \n", "50 2005-09-09 12:43:45.0 2005-09-09 12:43:45.0 2005-09-09 12:43:49.0 \n", "51 2005-09-09 12:44:09.0 2005-09-09 12:44:09.0 2005-09-09 12:44:09.0 \n", "52 2005-09-09 12:44:12.0 2005-09-09 12:44:12.0 2005-09-09 12:44:19.0 \n", "53 2005-09-09 12:44:35.0 2005-09-09 12:44:35.0 2005-09-09 12:44:35.0 \n", "54 2005-09-09 12:44:48.0 2005-09-09 12:44:48.0 2005-09-09 12:44:48.0 \n", "55 2005-09-09 12:45:42.0 2005-09-09 12:45:42.0 2005-09-09 12:45:42.0 \n", "56 2005-09-09 12:45:56.0 2005-09-09 12:45:56.0 2005-09-09 12:45:56.0 \n", "57 2005-09-09 12:46:48.0 2005-09-09 12:46:57.0 2005-09-09 12:46:57.0 \n", "58 2005-09-09 12:47:49.0 2005-09-09 12:48:21.0 2005-09-09 12:48:21.0 \n", "59 2005-09-09 12:49:24.0 2005-09-09 12:49:24.0 2005-09-09 12:49:24.0 \n", " ... ... ... \n", "\n", " Step Duration (sec) Correct Step Duration (sec) \\\n", "0 40 NaN \n", "1 16 16 \n", "2 36 NaN \n", "3 22 22 \n", "4 118 118 \n", "5 15 NaN \n", "6 12 12 \n", "7 5 5 \n", "8 12 12 \n", "9 5 5 \n", "10 46 46 \n", "11 22 NaN \n", "12 15 15 \n", "13 8 8 \n", "14 13 13 \n", "15 10 10 \n", "16 5 5 \n", "17 72 72 \n", "18 31 NaN \n", "19 8 8 \n", "20 6 6 \n", "21 9 9 \n", "22 7 7 \n", "23 11 11 \n", "24 8 8 \n", "25 8 NaN \n", "26 9 9 \n", "27 20 20 \n", "28 113 NaN \n", "29 8 8 \n", "30 58 58 \n", "31 6 6 \n", "32 8 8 \n", "33 7 7 \n", "34 5 5 \n", "35 7 7 \n", "36 6 6 \n", "37 11 11 \n", "38 8 8 \n", "39 10 10 \n", "40 38 NaN \n", "41 39 39 \n", "42 21 21 \n", "43 25 25 \n", "44 10 10 \n", "45 9 9 \n", "46 23 23 \n", "47 7 7 \n", "48 23 23 \n", "49 7 7 \n", "50 8 8 \n", "51 20 20 \n", "52 10 10 \n", "53 16 16 \n", "54 13 13 \n", "55 44 44 \n", "56 14 14 \n", "57 56 NaN \n", "58 84 NaN \n", "59 58 58 \n", " ... ... \n", "\n", " Error Step Duration (sec) Correct First Attempt Incorrects Hints \\\n", "0 40 0 2 3 \n", "1 NaN 1 0 0 \n", "2 36 0 2 3 \n", "3 NaN 1 0 0 \n", "4 NaN 1 0 0 \n", "5 15 0 1 0 \n", "6 NaN 1 0 0 \n", "7 NaN 1 0 0 \n", "8 NaN 1 0 0 \n", "9 NaN 1 0 0 \n", "10 NaN 1 0 0 \n", "11 22 0 1 0 \n", "12 NaN 1 0 0 \n", "13 NaN 1 0 0 \n", "14 NaN 1 0 0 \n", "15 NaN 1 0 0 \n", "16 NaN 1 0 0 \n", "17 NaN 1 2 0 \n", "18 31 0 1 0 \n", "19 NaN 1 0 0 \n", "20 NaN 1 0 0 \n", "21 NaN 1 0 0 \n", "22 NaN 1 0 0 \n", "23 NaN 1 0 0 \n", "24 NaN 1 0 0 \n", "25 8 0 1 0 \n", "26 NaN 1 0 0 \n", "27 NaN 1 0 0 \n", "28 113 0 2 3 \n", "29 NaN 1 0 0 \n", "30 NaN 1 0 0 \n", "31 NaN 1 0 0 \n", "32 NaN 1 0 0 \n", "33 NaN 1 0 0 \n", "34 NaN 1 0 0 \n", "35 NaN 1 0 0 \n", "36 NaN 1 0 0 \n", "37 NaN 1 0 0 \n", "38 NaN 1 0 0 \n", "39 NaN 1 0 0 \n", "40 38 0 1 3 \n", "41 NaN 1 0 0 \n", "42 NaN 1 0 0 \n", "43 NaN 1 0 0 \n", "44 NaN 1 0 0 \n", "45 NaN 1 0 0 \n", "46 NaN 1 0 0 \n", "47 NaN 1 0 0 \n", "48 NaN 1 0 0 \n", "49 NaN 1 0 0 \n", "50 NaN 1 0 0 \n", "51 NaN 1 0 0 \n", "52 NaN 1 0 0 \n", "53 NaN 1 0 0 \n", "54 NaN 1 0 0 \n", "55 NaN 1 0 0 \n", "56 NaN 1 0 0 \n", "57 56 0 1 0 \n", "58 84 0 1 0 \n", "59 NaN 1 0 0 \n", " ... ... ... ... \n", "\n", " Corrects KC(Default) \n", "0 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "1 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "2 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "3 1 [SkillRule: Remove coefficient; {ax+b=c, divid... ... \n", "4 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "5 1 [SkillRule: Remove coefficient; {ax+b=c, divid... ... \n", "6 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "7 1 [SkillRule: Remove positive coefficient; {ax/b... ... \n", "8 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "9 1 [SkillRule: Remove positive coefficient; {ax/b... ... \n", "10 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "11 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol... ... \n", "12 1 [SkillRule: Remove coefficient; {ax+b=c, divid... ... \n", "13 2 [SkillRule: Multiply/Divide; [Typein Skill: {R... ... \n", "14 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "15 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol... ... \n", "16 1 [SkillRule: Remove positive coefficient; {ax/b... ... \n", "17 2 [SkillRule: Multiply/Divide; [Typein Skill: {R... ... \n", "18 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "19 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol... ... \n", "20 1 [SkillRule: Remove coefficient; {ax+b=c, divid... ... \n", "21 2 [SkillRule: Multiply/Divide; [Typein Skill: {R... ... \n", "22 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "23 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol... ... \n", "24 1 [SkillRule: Consolidate vars with coeff; CLT] ... \n", "25 1 combine-like-terms-sp ... \n", "26 1 [SkillRule: Remove positive coefficient; {ax/b... ... \n", "27 2 [SkillRule: Multiply/Divide; [Typein Skill: {R... ... \n", "28 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "29 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "30 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "31 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "32 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "33 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "34 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "35 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "36 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "37 2 [SkillRule: Calculate Eliminate Parens; [Typei... ... \n", "38 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "39 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol... ... \n", "40 1 NaN ... \n", "41 2 simplify-fractions-sp~~combine-like-terms-sp ... \n", "42 1 [SkillRule: Consolidate vars, no coeff; CLT] ... \n", "43 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "44 2 [SkillRule: Calculate Eliminate Parens; [Typei... ... \n", "45 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "46 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol... ... \n", "47 1 [SkillRule: Consolidate vars, no coeff; CLT] ... \n", "48 1 simplify-fractions-sp~~combine-like-terms-sp ... \n", "49 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "50 2 [SkillRule: Calculate Eliminate Parens; [Typei... ... \n", "51 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "52 2 [SkillRule: Add/Subtract; [Typein Skill: {Isol... ... \n", "53 1 [SkillRule: Consolidate vars, no coeff; CLT] ... \n", "54 1 simplify-fractions-sp~~combine-like-terms-sp ... \n", "55 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "56 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "57 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", "58 1 [SkillRule: Remove constant; {ax+b=c, positive... ... \n", "59 1 [SkillRule: Eliminate Parens; {CLT nested; CLT... ... \n", " ... ... \n", "\n", "[809694 rows x 21 columns]" ] } ], "prompt_number": 116 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Create the testing dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# Create an empty testing dataframe\n", "testdf = pd.DataFrame(columns=df.columns)\n", "\n", "# Create the testing set\n", "unique_units = list(set(df['Problem Unit']))\n", "for i in range(len(unique_units)):\n", " # Get the last problem of the current problem unit\n", " lastProb = list(df[df['Problem Unit'] == unique_units[i]]['Problem Name'])[-1]\n", " \n", " # Get all the rows corresponding to the last problem for the given problem unit\n", " lastProbRows = a56data[(df['Problem Unit'] == unique_units[i]) & (df['Problem Name']==lastProb)]\n", " \n", " # Concatenate test dataframe with the rows just found\n", " testdf = pd.concat([testdf,lastProbRows])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 118 }, { "cell_type": "code", "collapsed": false, "input": [ "# Create a training dataframe that is equal to original dataframe with all the test cases removed\n", "trainIndex = df.index - testdf.index\n", "traindf = df.loc[trainIndex]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 119 }, { "cell_type": "code", "collapsed": false, "input": [ "# Get the target feature within the test set: the Correct First Attmpt\n", "CFAs = np.array(testdf['Correct First Attempt'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 120 }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Test functions" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# Define a helper function for calculating the root-mean-square error\n", "def RMSE(p,y):\n", " ''' The Root-Mean-Square Error takes the predicted values p for the target\n", " variable y and takes the square root of the mean of the square of their\n", " differences. '''\n", " return np.sqrt(np.sum(np.square(p-y))/len(y))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 121 }, { "cell_type": "code", "collapsed": false, "input": [ "# Test the RMSE for an array of all zeros\n", "p = np.zeros(len(CFAs))\n", "print 'An array of all zeros gives an RMSE of:',RMSE(p,CFAs)\n", "\n", "# Test the RMSE for an array of all ones\n", "p = np.ones(len(CFAs))\n", "print 'An array of all ones gives an RMSE of:',RMSE(p,CFAs)\n", "\n", "# Test the RMSE for an array of random 0s and 1s\n", "p = np.random.randint(0,2,len(CFAs)).astype(float)\n", "print 'An array of random ones and zeros gives an RMSE of:',RMSE(p,CFAs)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "An array of all zeros gives an RMSE of: 0.863841709437\n", "An array of all ones gives an RMSE of: 0.503763338322\n", "An array of random ones and zeros gives an RMSE of: 0.70685723912\n" ] } ], "prompt_number": 122 }, { "cell_type": "code", "collapsed": false, "input": [ "def error_metrics(p,yy):\n", " '''Calculates the error metrics, i.e. the precision and recall.\n", " Precision = True positives / Predicted positives\n", " Recall = True positives / Actual positives'''\n", " predicted_positives = len(p[p==1])\n", " actual_positives = len(yy[yy==1])\n", " # The predicted values for when actual values are 1\n", " pp = p[yy==1]\n", " # True positives are when these predicted values are also 1\n", " true_positives = len(pp[pp==1])\n", " false_positives = len(yy) - true_positives\n", " \n", " precision = float(true_positives) / float(predicted_positives)\n", " recall = float(true_positives) / float(actual_positives)\n", " \n", " F_1score = 2.0 * precision * recall / (precision + recall)\n", " \n", " print 'Root-mean-square error: ', RMSE(p,yy)\n", " \n", " print '\\nPrecision: Of all predicted CFAs, what fraction actually succeeded?'\n", " print precision\n", " \n", " print '\\nRecall: Of all actual CFAs, what fraction did we predict correctly?'\n", " print recall\n", " \n", " print '\\nF_1 Score: ', F_1score" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 125 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Machine learning" ] }, { "cell_type": "code", "collapsed": false, "input": [ "traindf.columns" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 20, "text": [ "Index([u'Row', u'Anon Student Id', u'Problem Hierarchy', u'Problem Unit', u'Problem Section', u'Problem Name', u'Problem View', u'Step Name', u'Step Start Time', u'First Transaction Time', u'Correct Transaction Time', u'Step End Time', u'Step Duration (sec)', u'Correct Step Duration (sec)', u'Error Step Duration (sec)', u'Correct First Attempt', u'Incorrects', u'Hints', u'Corrects', u'KC(Default)', u'Opportunity(Default)'], dtype='object')" ] } ], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "# Define a helper function to normalize the feature matrix X\n", "import numba\n", "def autonorm(X):\n", " ''' Calculates the mean and range of values of each column\n", " in the matrix (features) subtracts the mean from each value\n", " and divides by the range, thereby normalizing all values to\n", " fall between -1 and 1.'''\n", " x_means = np.mean(X,axis=0)\n", " x_means = np.ones(np.shape(X))*x_means\n", " x_maxs = np.max(X,axis=0)\n", " x_mins = np.min(X,axis=0)\n", " x_range = x_maxs - x_mins\n", " X_normd = (X - x_means) / x_range\n", " return X_normd\n", "\n", "autonorm_jit = numba.jit(autonorm)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 127 }, { "cell_type": "code", "collapsed": false, "input": [ "features_to_norm = ['Step Duration (sec)','Hints','Problem View']\n", "category_features = ['Anon Student Id', 'Problem Hierarchy', 'Problem Unit', 'Problem Section', 'Problem Name']\n", "target_feature = ['Correct First Attempt']\n", "features = features_to_norm + category_features\n", "all_features = features_to_norm + category_features + target_feature" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 150 }, { "cell_type": "code", "collapsed": false, "input": [ "X = traindf[all_features].dropna()\n", "y = np.array(X[target_feature]).astype(int).ravel()\n", "X_to_norm = np.array(X[features_to_norm])\n", "X_nonnorm = np.array(X[category_features])\n", "X_to_norm = autonorm(X_to_norm)\n", "X = np.concatenate((X_to_norm,X_nonnorm), axis=1)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 129 }, { "cell_type": "code", "collapsed": false, "input": [ "XX = testdf[all_features].dropna()\n", "yy = np.array(XX[target_feature]).astype(int).ravel()\n", "XX_to_norm = np.array(XX[features_to_norm])\n", "XX_nonnorm = np.array(XX[category_features])\n", "XX_to_norm = autonorm(XX_to_norm)\n", "XX = np.concatenate((XX_to_norm,XX_nonnorm), axis=1)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 130 }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn import tree\n", "model = tree.DecisionTreeClassifier()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 131 }, { "cell_type": "code", "collapsed": false, "input": [ "model = model.fit(X,y)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 133 }, { "cell_type": "code", "collapsed": false, "input": [ "p = model.predict(XX).astype(float)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 134 }, { "cell_type": "code", "collapsed": false, "input": [ "error_metrics(p,yy)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Root-mean-square error: 0.59582966275\n", "\n", "Precision: Of all predicted CFAs, what fraction actually succeeded?\n", "0.823697236354\n", "\n", "Recall: Of all actual CFAs, what fraction did we predict correctly?\n", "0.667139953785\n", "\n", "F_1 Score: 0.737198320284\n" ] } ], "prompt_number": 135 }, { "cell_type": "code", "collapsed": false, "input": [ "scores = cross_val_score(model, X, y)\n", "print 'Accuracy: {0:5.2f} (+/-{1:5.2f})'.format(scores.mean(), scores.std()*2)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Accuracy: 0.78 (+/- 0.01)\n" ] } ], "prompt_number": 139 }, { "cell_type": "code", "collapsed": false, "input": [ "from sklearn.ensemble import RandomForestClassifier\n", "\n", "n_ests = 70\n", "model = RandomForestClassifier(n_estimators=n_ests, criterion=\"entropy\", max_features=None)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 155 }, { "cell_type": "code", "collapsed": false, "input": [ "model = model.fit(X,y)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 156 }, { "cell_type": "code", "collapsed": false, "input": [ "p = model.predict(XX).astype(float)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 157 }, { "cell_type": "code", "collapsed": false, "input": [ "error_metrics(p,yy)\n", "importances = model.feature_importances_\n", "n_feats = len(features)\n", "feat_std = np.std([tree.feature_importances_ for tree in model.estimators_],axis=0)\n", "indices = np.argsort(importances)[::-1]\n", "\n", "# Print the feature ranking\n", "print(\"\\nFeature ranking:\")\n", "\n", "for f in range(n_feats):\n", " print '{0:2} - {1:20}: {2:5.4f} (std: {3:5.4f})'.format(f+1,features[indices[f]],importances[indices[f]],feat_std[indices[f]])" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Root-mean-square error: 0.500098691559\n", "\n", "Precision: Of all predicted CFAs, what fraction actually succeeded?\n", "0.834598896018\n", "\n", "Recall: Of all actual CFAs, what fraction did we predict correctly?\n", "0.829254711991\n", "\n", "F_1 Score: 0.831918221428\n", "\n", "Feature ranking:" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", " 1 - Anon Student Id : 0.2705 (std: 0.0026)\n", " 2 - Hints : 0.2593 (std: 0.0011)\n", " 3 - Step Duration (sec) : 0.2261 (std: 0.0032)\n", " 4 - Problem Name : 0.1125 (std: 0.0022)\n", " 5 - Problem View : 0.0444 (std: 0.0010)\n", " 6 - Problem Section : 0.0358 (std: 0.0014)\n", " 7 - Problem Hierarchy : 0.0306 (std: 0.0011)\n", " 8 - Problem Unit : 0.0208 (std: 0.0011)\n" ] } ], "prompt_number": 161 }, { "cell_type": "code", "collapsed": false, "input": [ "scores = cross_val_score(model, X, y)\n", "print 'Accuracy: {0:5.2f} (+/-{1:5.2f})'.format(scores.mean(), scores.std()*2)" ], "language": "python", "metadata": {}, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "pyerr", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0;32mprint\u001b[0m \u001b[0;34m'Accuracy: {0:5.2f} (+/-{1:5.2f})'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscores\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscores\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc\u001b[0m in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, scoring, cv, n_jobs, verbose, fit_params, score_func, pre_dispatch)\u001b[0m\n\u001b[1;32m 1149\u001b[0m \u001b[0mtrain\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1150\u001b[0m fit_params)\n\u001b[0;32m-> 1151\u001b[0;31m for train, test in cv)\n\u001b[0m\u001b[1;32m 1152\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1153\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 651\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 652\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miterable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 653\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 654\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpre_dispatch\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"all\"\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mn_jobs\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc\u001b[0m in \u001b[0;36mdispatch\u001b[0;34m(self, func, args, kwargs)\u001b[0m\n\u001b[1;32m 398\u001b[0m \"\"\"\n\u001b[1;32m 399\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_pool\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 400\u001b[0;31m \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImmediateApply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 401\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 402\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0m_verbosity_filter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, func, args, kwargs)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;31m# arguments in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 138\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc\u001b[0m in \u001b[0;36m_fit_and_score\u001b[0;34m(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters)\u001b[0m\n\u001b[1;32m 1237\u001b[0m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1238\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1239\u001b[0;31m \u001b[0mestimator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mfit_params\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1240\u001b[0m \u001b[0mtest_score\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mscorer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1241\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mreturn_train_score\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/ensemble/forest.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 277\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 278\u001b[0m verbose=self.verbose)\n\u001b[0;32m--> 279\u001b[0;31m for i in range(n_jobs))\n\u001b[0m\u001b[1;32m 280\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0;31m# Reduce\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 651\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_iterating\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 652\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mfunction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;32min\u001b[0m \u001b[0miterable\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 653\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdispatch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 654\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mpre_dispatch\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"all\"\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mn_jobs\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc\u001b[0m in \u001b[0;36mdispatch\u001b[0;34m(self, func, args, kwargs)\u001b[0m\n\u001b[1;32m 398\u001b[0m \"\"\"\n\u001b[1;32m 399\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_pool\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 400\u001b[0;31m \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mImmediateApply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 401\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jobs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 402\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0m_verbosity_filter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, func, args, kwargs)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;31m# Don't delay the application, to avoid keeping the input\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[0;31m# arguments in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 138\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 140\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/ensemble/forest.pyc\u001b[0m in \u001b[0;36m_parallel_build_trees\u001b[0;34m(trees, forest, X, y, sample_weight, verbose)\u001b[0m\n\u001b[1;32m 87\u001b[0m tree.fit(X, y,\n\u001b[1;32m 88\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcurr_sample_weight\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 89\u001b[0;31m check_input=False)\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0mtree\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindices_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msample_counts\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/Users/mikhail/anaconda/lib/python2.7/site-packages/sklearn/tree/tree.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, sample_mask, X_argsorted, check_input, sample_weight)\u001b[0m\n\u001b[1;32m 265\u001b[0m max_leaf_nodes)\n\u001b[1;32m 266\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 267\u001b[0;31m \u001b[0mbuilder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtree_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msample_weight\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 268\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_outputs_\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "prompt_number": 162 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }