{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:26:38.391077",
     "start_time": "2017-11-17T21:26:35.782547"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div class=\"bk-root\">\n",
       "        <a href=\"http://bokeh.pydata.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
       "        <span id=\"8ddb7ce4-4461-4d04-a332-7e6c859ff046\">Loading BokehJS ...</span>\n",
       "    </div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "\n",
       "(function(global) {\n",
       "  function now() {\n",
       "    return new Date();\n",
       "  }\n",
       "\n",
       "  var force = \"1\";\n",
       "\n",
       "  if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force !== \"\") {\n",
       "    window._bokeh_onload_callbacks = [];\n",
       "    window._bokeh_is_loading = undefined;\n",
       "  }\n",
       "\n",
       "\n",
       "  \n",
       "  if (typeof (window._bokeh_timeout) === \"undefined\" || force !== \"\") {\n",
       "    window._bokeh_timeout = Date.now() + 5000;\n",
       "    window._bokeh_failed_load = false;\n",
       "  }\n",
       "\n",
       "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
       "     \"<div style='background-color: #fdd'>\\n\"+\n",
       "     \"<p>\\n\"+\n",
       "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
       "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
       "     \"</p>\\n\"+\n",
       "     \"<ul>\\n\"+\n",
       "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
       "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
       "     \"</ul>\\n\"+\n",
       "     \"<code>\\n\"+\n",
       "     \"from bokeh.resources import INLINE\\n\"+\n",
       "     \"output_notebook(resources=INLINE)\\n\"+\n",
       "     \"</code>\\n\"+\n",
       "     \"</div>\"}};\n",
       "\n",
       "  function display_loaded() {\n",
       "    if (window.Bokeh !== undefined) {\n",
       "      Bokeh.$(\"#8ddb7ce4-4461-4d04-a332-7e6c859ff046\").text(\"BokehJS successfully loaded.\");\n",
       "    } else if (Date.now() < window._bokeh_timeout) {\n",
       "      setTimeout(display_loaded, 100)\n",
       "    }\n",
       "  }\n",
       "\n",
       "  function run_callbacks() {\n",
       "    window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n",
       "    delete window._bokeh_onload_callbacks\n",
       "    console.info(\"Bokeh: all callbacks have finished\");\n",
       "  }\n",
       "\n",
       "  function load_libs(js_urls, callback) {\n",
       "    window._bokeh_onload_callbacks.push(callback);\n",
       "    if (window._bokeh_is_loading > 0) {\n",
       "      console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
       "      return null;\n",
       "    }\n",
       "    if (js_urls == null || js_urls.length === 0) {\n",
       "      run_callbacks();\n",
       "      return null;\n",
       "    }\n",
       "    console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
       "    window._bokeh_is_loading = js_urls.length;\n",
       "    for (var i = 0; i < js_urls.length; i++) {\n",
       "      var url = js_urls[i];\n",
       "      var s = document.createElement('script');\n",
       "      s.src = url;\n",
       "      s.async = false;\n",
       "      s.onreadystatechange = s.onload = function() {\n",
       "        window._bokeh_is_loading--;\n",
       "        if (window._bokeh_is_loading === 0) {\n",
       "          console.log(\"Bokeh: all BokehJS libraries loaded\");\n",
       "          run_callbacks()\n",
       "        }\n",
       "      };\n",
       "      s.onerror = function() {\n",
       "        console.warn(\"failed to load library \" + url);\n",
       "      };\n",
       "      console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
       "      document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "    }\n",
       "  };var element = document.getElementById(\"8ddb7ce4-4461-4d04-a332-7e6c859ff046\");\n",
       "  if (element == null) {\n",
       "    console.log(\"Bokeh: ERROR: autoload.js configured with elementid '8ddb7ce4-4461-4d04-a332-7e6c859ff046' but no matching script tag was found. \")\n",
       "    return false;\n",
       "  }\n",
       "\n",
       "  var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.js'];\n",
       "\n",
       "  var inline_js = [\n",
       "    function(Bokeh) {\n",
       "      Bokeh.set_log_level(\"info\");\n",
       "    },\n",
       "    \n",
       "    function(Bokeh) {\n",
       "      \n",
       "      Bokeh.$(\"#8ddb7ce4-4461-4d04-a332-7e6c859ff046\").text(\"BokehJS is loading...\");\n",
       "    },\n",
       "    function(Bokeh) {\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n",
       "    }\n",
       "  ];\n",
       "\n",
       "  function run_inline_js() {\n",
       "    \n",
       "    if ((window.Bokeh !== undefined) || (force === \"1\")) {\n",
       "      for (var i = 0; i < inline_js.length; i++) {\n",
       "        inline_js[i](window.Bokeh);\n",
       "      }if (force === \"1\") {\n",
       "        display_loaded();\n",
       "      }} else if (Date.now() < window._bokeh_timeout) {\n",
       "      setTimeout(run_inline_js, 100);\n",
       "    } else if (!window._bokeh_failed_load) {\n",
       "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
       "      window._bokeh_failed_load = true;\n",
       "    } else if (!force) {\n",
       "      var cell = $(\"#8ddb7ce4-4461-4d04-a332-7e6c859ff046\").parents('.cell').data().cell;\n",
       "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
       "    }\n",
       "\n",
       "  }\n",
       "\n",
       "  if (window._bokeh_is_loading === 0) {\n",
       "    console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
       "    run_inline_js();\n",
       "  } else {\n",
       "    load_libs(js_urls, function() {\n",
       "      console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n",
       "      run_inline_js();\n",
       "    });\n",
       "  }\n",
       "}(this));"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Custom libraries\n",
    "from datascienceutils import plotter\n",
    "from datascienceutils import analyze\n",
    "from datascienceutils import predictiveModels as pm\n",
    "from datascienceutils import sklearnUtils as sku\n",
    "\n",
    "from IPython.display import Image\n",
    "# Standard libraries\n",
    "import json\n",
    "%matplotlib inline\n",
    "import datetime\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import random\n",
    "\n",
    "from sklearn import cross_validation\n",
    "from sklearn import metrics\n",
    "\n",
    "from bokeh.plotting import figure, show, output_file, output_notebook, ColumnDataSource\n",
    "from bokeh.charts import Histogram\n",
    "import bokeh\n",
    "output_notebook()\n",
    "\n",
    "# Set pandas display options\n",
    "#pd.set_option('display.width', pd.util.terminal.get_terminal_size()[0])\n",
    "pd.set_option('display.expand_frame_repr', False)\n",
    "pd.set_option('max_colwidth', 800)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:26:44.912135",
     "start_time": "2017-11-17T21:26:44.887664"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Data set from https://archive.ics.uci.edu/ml/machine-learning-databases/audiology/ i.e: famous uci ml data set repository\n",
    "\n",
    "with open('./data/audiology.data', 'r') as fd:\n",
    "    data = fd.readlines()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:26:51.568329",
     "start_time": "2017-11-17T21:26:51.532891"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['WARNING: This database should be credited to the original owner whenever\\n',\n",
      " '         used for any publication whatsoever.\\n',\n",
      " '\\n',\n",
      " '1. Title: Audiology Database\\n',\n",
      " '\\n',\n",
      " '2. Sources:\\n',\n",
      " '    (a) Original Owner: Professor Jergen at Baylor College of Medicine\\n',\n",
      " '    (b) Donor: Bruce Porter (porter@fall.cs.utexas.EDU)\\n',\n",
      " '    (c) Date Received: 12/3/1987\\n',\n",
      " '\\n',\n",
      " '3. Past Usage: \\n',\n",
      " '   -- See: Bareiss, E. Ray, & Porter, Bruce (1987).  Protos: An '\n",
      " 'Exemplar-Based\\n',\n",
      " '      Learning Apprentice.  In the Proceedings of the 4th International\\n',\n",
      " '      Workshop on Machine Learning, 12-23, Irvine, CA: Morgan Kaufmann.\\n',\n",
      " '\\n',\n",
      " '4. Relevant Information:\\n',\n",
      " '   -- Contact Ray Bareiss (rbareiss@uunet.uucp ??), now at Vanderbilt \\n',\n",
      " '      University, for more information.\\n',\n",
      " '   -- Domain expert: Professor Craig Wier of the University of Texas, '\n",
      " 'Austin.\\n',\n",
      " '\\n',\n",
      " '5. Number of instances: 200 training cases, 26 test cases\\n',\n",
      " '\\n',\n",
      " '6. Number of attributes: ???\\n',\n",
      " '\\n',\n",
      " '7. Attribute information: (all attributes are nominally valued)\\n',\n",
      " '   1. case identifier.\\n',\n",
      " '   2. classification (24 classes)\\n',\n",
      " '   3. List of case features\\n',\n",
      " '      -- format: form f(v) should be read as \"feature f has value v\"\\n',\n",
      " '\\n',\n",
      " '8. Missing attribute values:\\n',\n",
      " '   -- This database does NOT use a standard set of attributes per '\n",
      " 'instance.\\n',\n",
      " '\\n',\n",
      " '9. Class Distribution: (in the training set)\\n',\n",
      " '    1. acoustic_neuroma: 1\\n',\n",
      " '    2. bells_palsy: 1\\n',\n",
      " '    3. cochlear_age: 46\\n',\n",
      " '    4. cochlear_age_and_noise: 18\\n',\n",
      " '    5. cochlear_age_plus_poss_menieres: 1\\n',\n",
      " '    6. cochlear_noise_and_heredity: 2\\n',\n",
      " '    7. cochlear_poss_noise: 16\\n',\n",
      " '    8. cochlear_unknown: 48\\n',\n",
      " '    9. conductive_discontinuity: 2\\n',\n",
      " '   10. conductive_fixation: 6\\n',\n",
      " '   11. mixed_cochlear_age_fixation: 1\\n',\n",
      " '   12. mixed_cochlear_age_otitis_media: 4\\n',\n",
      " '   13. mixed_cochlear_age_s_om: 2\\n',\n",
      " '   14. mixed_cochlear_unk_discontinuity: 2\\n',\n",
      " '   15. mixed_cochlear_unk_fixation: 5\\n',\n",
      " '   16. mixed_cochlear_unk_ser_om: 3\\n',\n",
      " '   17. mixed_poss_central_om: 1\\n',\n",
      " '   18. mixed_poss_noise_om: 2\\n',\n",
      " '   19. normal_ear: 20\\n',\n",
      " '   20. otitis_media: 4\\n',\n",
      " '   21. poss_central: 1\\n',\n",
      " '   22. possible_brainstem_disorder: 4\\n',\n",
      " '   23. possible_menieres: 8\\n',\n",
      " '   24. retrocochlear_unknown: 2\\n',\n",
      " '   --------------------Total: 200\\n']\n"
     ]
    }
   ],
   "source": [
    "from pprint import pprint\n",
    "with open('./data/audiology.names', 'r') as fd:\n",
    "    pprint(fd.readlines())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:26:59.041263",
     "start_time": "2017-11-17T21:26:59.026120"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "\n",
    "all_obs = set()\n",
    "\n",
    "def parse_line(line):\n",
    "    global all_obs\n",
    "    line = line.strip('\\n')\n",
    "    line = line.strip(']')\n",
    "    line = line.strip('[')\n",
    "    all_f = line.split(',')\n",
    "    caseid = all_f[0]\n",
    "    classif = all_f[1]\n",
    "    descs = all_f[2:]\n",
    "    descs[0] = descs[0].strip('[')\n",
    "    features = list()\n",
    "    for ea in descs:\n",
    "        all_obs.add(ea)\n",
    "    descs = ','.join(descs)\n",
    "    return [caseid, classif, descs]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:26:59.894302",
     "start_time": "2017-11-17T21:26:59.046235"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "audiology_df = pd.DataFrame(columns=['case_id', 'classification', 'case_features']) #'age_gt_60', 'boneAbnormal','airBoneGap', 'ar_c(normal)'])\n",
    "for idx, each in enumerate(data):\n",
    "    if bool(each):\n",
    "        line = parse_line(each)\n",
    "        audiology_df.loc[idx] = line\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:26:59.943481",
     "start_time": "2017-11-17T21:26:59.896842"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>case_id</th>\n",
       "      <th>classification</th>\n",
       "      <th>case_features</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>p1</td>\n",
       "      <td>cochlear_unknown</td>\n",
       "      <td>boneAbnormal,air(mild),ar_c(normal),ar_u(normal),o_ar_c(normal),o_ar_u(normal),speech(normal),static(normal),tymp(a)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>p2</td>\n",
       "      <td>cochlear_unknown</td>\n",
       "      <td>boneAbnormal,air(moderate),ar_c(normal),ar_u(normal),o_ar_c(normal),o_ar_u(normal),speech(normal),static(normal),tymp(a)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>p3</td>\n",
       "      <td>mixed_cochlear_age_fixation</td>\n",
       "      <td>age_gt_60,airBoneGap,boneAbnormal,air(mild),ar_u(absent),bone(mild),o_ar_u(absent),speech(normal),static(normal),tymp(as)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>p4</td>\n",
       "      <td>mixed_cochlear_age_otitis_media</td>\n",
       "      <td>age_gt_60,airBoneGap,air(mild),ar_u(absent),bone(mild),o_ar_u(absent),speech(normal),static(normal),tymp(b)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>p5</td>\n",
       "      <td>cochlear_age</td>\n",
       "      <td>age_gt_60,boneAbnormal,air(mild),ar_c(normal),ar_u(normal),bone(mild),o_ar_c(normal),o_ar_u(normal),speech(good),static(normal),tymp(a)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  case_id                   classification                                                                                                                            case_features\n",
       "0      p1                 cochlear_unknown                     boneAbnormal,air(mild),ar_c(normal),ar_u(normal),o_ar_c(normal),o_ar_u(normal),speech(normal),static(normal),tymp(a)\n",
       "1      p2                 cochlear_unknown                 boneAbnormal,air(moderate),ar_c(normal),ar_u(normal),o_ar_c(normal),o_ar_u(normal),speech(normal),static(normal),tymp(a)\n",
       "2      p3      mixed_cochlear_age_fixation                age_gt_60,airBoneGap,boneAbnormal,air(mild),ar_u(absent),bone(mild),o_ar_u(absent),speech(normal),static(normal),tymp(as)\n",
       "3      p4  mixed_cochlear_age_otitis_media                              age_gt_60,airBoneGap,air(mild),ar_u(absent),bone(mild),o_ar_u(absent),speech(normal),static(normal),tymp(b)\n",
       "4      p5                     cochlear_age  age_gt_60,boneAbnormal,air(mild),ar_c(normal),ar_u(normal),bone(mild),o_ar_c(normal),o_ar_u(normal),speech(good),static(normal),tymp(a)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "audiology_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Looks like the case_features are all text labels/observations by doctors. Let's split them into features and make them boolean."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:27:00.252014",
     "start_time": "2017-11-17T21:26:59.946003"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                  case_id  case_features\n",
      "classification                                          \n",
      "acoustic_neuroma                        1              1\n",
      "bells_palsy                             1              1\n",
      "cochlear_age                           46             46\n",
      "cochlear_age_and_noise                 18             18\n",
      "cochlear_age_plus_poss_menieres         1              1\n",
      "cochlear_noise_and_heredity             2              2\n",
      "cochlear_poss_noise                    16             16\n",
      "cochlear_unknown                       48             48\n",
      "conductive_discontinuity                2              2\n",
      "conductive_fixation                     6              6\n",
      "mixed_cochlear_age_fixation             1              1\n",
      "mixed_cochlear_age_otitis_media         4              4\n",
      "mixed_cochlear_age_s_om                 2              2\n",
      "mixed_cochlear_unk_discontinuity        2              2\n",
      "mixed_cochlear_unk_fixation             5              5\n",
      "mixed_cochlear_unk_ser_om               3              3\n",
      "mixed_poss_central_om                   1              1\n",
      "mixed_poss_noise_om                     2              2\n",
      "normal_ear                             20             20\n",
      "otitis_media                            4              4\n",
      "poss_central                            1              1\n",
      "possible_brainstem_disorder             4              4\n",
      "possible_menieres                       8              8\n",
      "retrocochlear_unknown                   2              2\n"
     ]
    }
   ],
   "source": [
    "print(audiology_df.groupby('classification').count())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:27:00.649407",
     "start_time": "2017-11-17T21:27:00.253804"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#def check_defect_presence():\n",
    "#    if ea in all_obs:\n",
    "#       pass\n",
    "for ea in all_obs:\n",
    "    audiology_df[ea] = audiology_df['case_features'].apply( lambda x: True if ea in x else False)\n",
    "audiology_df.drop('case_features', 1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:27:00.907440",
     "start_time": "2017-11-17T21:27:00.650898"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>case_id</th>\n",
       "      <th>classification</th>\n",
       "      <th>bone(normal)</th>\n",
       "      <th>history(fullness)</th>\n",
       "      <th>o_ar_c(elevated)</th>\n",
       "      <th>mod_s_sn_gt_500</th>\n",
       "      <th>mod_sn_gt_4k</th>\n",
       "      <th>notch_4k</th>\n",
       "      <th>late_wave(poor)</th>\n",
       "      <th>s_sn_gt_2k</th>\n",
       "      <th>...</th>\n",
       "      <th>air(normal)</th>\n",
       "      <th>tymp(b)</th>\n",
       "      <th>m_s_sn_gt_2k</th>\n",
       "      <th>o_ar_u(elevated)</th>\n",
       "      <th>age_gt_60</th>\n",
       "      <th>tymp(ad)</th>\n",
       "      <th>history(recruitment)</th>\n",
       "      <th>m_m_sn</th>\n",
       "      <th>m_sn_gt_1k</th>\n",
       "      <th>o_ar_u(normal)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>p1</td>\n",
       "      <td>cochlear_unknown</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>p2</td>\n",
       "      <td>cochlear_unknown</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>p3</td>\n",
       "      <td>mixed_cochlear_age_fixation</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>p4</td>\n",
       "      <td>mixed_cochlear_age_otitis_media</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>p5</td>\n",
       "      <td>cochlear_age</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 89 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  case_id                   classification  bone(normal)  history(fullness)  o_ar_c(elevated)  mod_s_sn_gt_500  mod_sn_gt_4k  notch_4k  late_wave(poor)  s_sn_gt_2k       ...        air(normal)  tymp(b)  m_s_sn_gt_2k  o_ar_u(elevated)  age_gt_60  tymp(ad)  history(recruitment)  m_m_sn  m_sn_gt_1k  o_ar_u(normal)\n",
       "0      p1                 cochlear_unknown         False              False             False            False         False     False            False       False       ...              False    False         False             False      False     False                 False   False       False            True\n",
       "1      p2                 cochlear_unknown         False              False             False            False         False     False            False       False       ...              False    False         False             False      False     False                 False   False       False            True\n",
       "2      p3      mixed_cochlear_age_fixation         False              False             False            False         False     False            False       False       ...              False    False         False             False       True     False                 False   False       False           False\n",
       "3      p4  mixed_cochlear_age_otitis_media         False              False             False            False         False     False            False       False       ...              False     True         False             False       True     False                 False   False       False           False\n",
       "4      p5                     cochlear_age         False              False             False            False         False     False            False       False       ...              False    False         False             False       True     False                 False   False       False            True\n",
       "\n",
       "[5 rows x 89 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "audiology_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## OKay, based on the above data set sample, the only meaningful thing we can try is to see if we can predict the case classification based on any of the observed features.\n",
    "\n",
    "## We have 87 features,(I'm assuming these are labels that came out of human judgment) and most of it is false.. aka this is a sparsely populated dataset in these dimensions, and most likely the dimensions are not orthogonal(aka independent) to(of) each other. \n",
    "\n",
    "## Due to these reasons, \n",
    "    * a tree based prediction is best(since it is all boolean features)\n",
    "    * Xgboost since it is mostly False/empty features.(aka sparse features)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:27:01.129284",
     "start_time": "2017-11-17T21:27:00.908902"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>case_id</th>\n",
       "      <th>classification</th>\n",
       "      <th>bone(normal)</th>\n",
       "      <th>history(fullness)</th>\n",
       "      <th>o_ar_c(elevated)</th>\n",
       "      <th>mod_s_sn_gt_500</th>\n",
       "      <th>mod_sn_gt_4k</th>\n",
       "      <th>notch_4k</th>\n",
       "      <th>late_wave(poor)</th>\n",
       "      <th>s_sn_gt_2k</th>\n",
       "      <th>...</th>\n",
       "      <th>air(normal)</th>\n",
       "      <th>tymp(b)</th>\n",
       "      <th>m_s_sn_gt_2k</th>\n",
       "      <th>o_ar_u(elevated)</th>\n",
       "      <th>age_gt_60</th>\n",
       "      <th>tymp(ad)</th>\n",
       "      <th>history(recruitment)</th>\n",
       "      <th>m_m_sn</th>\n",
       "      <th>m_sn_gt_1k</th>\n",
       "      <th>o_ar_u(normal)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>p1</td>\n",
       "      <td>cochlear_unknown</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>p2</td>\n",
       "      <td>cochlear_unknown</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>p3</td>\n",
       "      <td>mixed_cochlear_age_fixation</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>p4</td>\n",
       "      <td>mixed_cochlear_age_otitis_media</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>p5</td>\n",
       "      <td>cochlear_age</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 89 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  case_id                   classification  bone(normal)  history(fullness)  o_ar_c(elevated)  mod_s_sn_gt_500  mod_sn_gt_4k  notch_4k  late_wave(poor)  s_sn_gt_2k       ...        air(normal)  tymp(b)  m_s_sn_gt_2k  o_ar_u(elevated)  age_gt_60  tymp(ad)  history(recruitment)  m_m_sn  m_sn_gt_1k  o_ar_u(normal)\n",
       "0      p1                 cochlear_unknown         False              False             False            False         False     False            False       False       ...              False    False         False             False      False     False                 False   False       False            True\n",
       "1      p2                 cochlear_unknown         False              False             False            False         False     False            False       False       ...              False    False         False             False      False     False                 False   False       False            True\n",
       "2      p3      mixed_cochlear_age_fixation         False              False             False            False         False     False            False       False       ...              False    False         False             False       True     False                 False   False       False           False\n",
       "3      p4  mixed_cochlear_age_otitis_media         False              False             False            False         False     False            False       False       ...              False     True         False             False       True     False                 False   False       False           False\n",
       "4      p5                     cochlear_age         False              False             False            False         False     False            False       False       ...              False    False         False             False       True     False                 False   False       False            True\n",
       "\n",
       "[5 rows x 89 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "audiology_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:27:01.341072",
     "start_time": "2017-11-17T21:27:01.130712"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "\n",
    "le = LabelEncoder()\n",
    "le.fit(audiology_df['classification'].unique())\n",
    "audiology_df['classification'] = le.transform(audiology_df['classification'])\n",
    "target = audiology_df.classification\n",
    "\n",
    "audiology_df.drop(['case_id', 'classification'], 1, inplace=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:27:01.684735",
     "start_time": "2017-11-17T21:27:01.342669"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>bone(normal)</th>\n",
       "      <th>history(fullness)</th>\n",
       "      <th>o_ar_c(elevated)</th>\n",
       "      <th>mod_s_sn_gt_500</th>\n",
       "      <th>mod_sn_gt_4k</th>\n",
       "      <th>notch_4k</th>\n",
       "      <th>late_wave(poor)</th>\n",
       "      <th>s_sn_gt_2k</th>\n",
       "      <th>bone(moderate)</th>\n",
       "      <th>mod_gt_4k</th>\n",
       "      <th>...</th>\n",
       "      <th>air(normal)</th>\n",
       "      <th>tymp(b)</th>\n",
       "      <th>m_s_sn_gt_2k</th>\n",
       "      <th>o_ar_u(elevated)</th>\n",
       "      <th>age_gt_60</th>\n",
       "      <th>tymp(ad)</th>\n",
       "      <th>history(recruitment)</th>\n",
       "      <th>m_m_sn</th>\n",
       "      <th>m_sn_gt_1k</th>\n",
       "      <th>o_ar_u(normal)</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>...</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 87 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   bone(normal)  history(fullness)  o_ar_c(elevated)  mod_s_sn_gt_500  mod_sn_gt_4k  notch_4k  late_wave(poor)  s_sn_gt_2k  bone(moderate)  mod_gt_4k       ...        air(normal)  tymp(b)  m_s_sn_gt_2k  o_ar_u(elevated)  age_gt_60  tymp(ad)  history(recruitment)  m_m_sn  m_sn_gt_1k  o_ar_u(normal)\n",
       "0         False              False             False            False         False     False            False       False           False      False       ...              False    False         False             False      False     False                 False   False       False            True\n",
       "1         False              False             False            False         False     False            False       False           False      False       ...              False    False         False             False      False     False                 False   False       False            True\n",
       "2         False              False             False            False         False     False            False       False           False      False       ...              False    False         False             False       True     False                 False   False       False           False\n",
       "3         False              False             False            False         False     False            False       False           False      False       ...              False     True         False             False       True     False                 False   False       False           False\n",
       "4         False              False             False            False         False     False            False       False           False      False       ...              False    False         False             False       True     False                 False   False       False            True\n",
       "\n",
       "[5 rows x 87 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "audiology_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-11-17T15:56:59.135Z"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean squared error: 24.52\n",
      "Variance score: 0.72\n"
     ]
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(audiology_df, target, test_size=0.3)\n",
    "tree_model = pm.train(X_train, y_train, 'tree')\n",
    "tree_model.fit(X_train, y_train)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((tree_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % tree_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-11-17T15:56:59.145Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plotter.show_tree_model(tree_model, model_type='tree')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-11-17T15:56:59.159Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Train the model using the training sets\n",
    "xgb_model = pm.train(X_train, y_train, 'xgboost')\n",
    "xgb_model.fit(X_train, y_train)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((xgb_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "start_time": "2017-11-17T15:56:59.168Z"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "plotter.show_tree_model(xgb_model, model_type='xgboost')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}