{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:59:10.316323",
     "start_time": "2017-11-17T21:59:07.658334"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div class=\"bk-root\">\n",
       "        <a href=\"http://bokeh.pydata.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
       "        <span id=\"ba332f2d-cc92-4337-a4b3-67ed92fab092\">Loading BokehJS ...</span>\n",
       "    </div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "\n",
       "(function(global) {\n",
       "  function now() {\n",
       "    return new Date();\n",
       "  }\n",
       "\n",
       "  var force = \"1\";\n",
       "\n",
       "  if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force !== \"\") {\n",
       "    window._bokeh_onload_callbacks = [];\n",
       "    window._bokeh_is_loading = undefined;\n",
       "  }\n",
       "\n",
       "\n",
       "  \n",
       "  if (typeof (window._bokeh_timeout) === \"undefined\" || force !== \"\") {\n",
       "    window._bokeh_timeout = Date.now() + 5000;\n",
       "    window._bokeh_failed_load = false;\n",
       "  }\n",
       "\n",
       "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
       "     \"<div style='background-color: #fdd'>\\n\"+\n",
       "     \"<p>\\n\"+\n",
       "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
       "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
       "     \"</p>\\n\"+\n",
       "     \"<ul>\\n\"+\n",
       "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
       "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
       "     \"</ul>\\n\"+\n",
       "     \"<code>\\n\"+\n",
       "     \"from bokeh.resources import INLINE\\n\"+\n",
       "     \"output_notebook(resources=INLINE)\\n\"+\n",
       "     \"</code>\\n\"+\n",
       "     \"</div>\"}};\n",
       "\n",
       "  function display_loaded() {\n",
       "    if (window.Bokeh !== undefined) {\n",
       "      Bokeh.$(\"#ba332f2d-cc92-4337-a4b3-67ed92fab092\").text(\"BokehJS successfully loaded.\");\n",
       "    } else if (Date.now() < window._bokeh_timeout) {\n",
       "      setTimeout(display_loaded, 100)\n",
       "    }\n",
       "  }\n",
       "\n",
       "  function run_callbacks() {\n",
       "    window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n",
       "    delete window._bokeh_onload_callbacks\n",
       "    console.info(\"Bokeh: all callbacks have finished\");\n",
       "  }\n",
       "\n",
       "  function load_libs(js_urls, callback) {\n",
       "    window._bokeh_onload_callbacks.push(callback);\n",
       "    if (window._bokeh_is_loading > 0) {\n",
       "      console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
       "      return null;\n",
       "    }\n",
       "    if (js_urls == null || js_urls.length === 0) {\n",
       "      run_callbacks();\n",
       "      return null;\n",
       "    }\n",
       "    console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
       "    window._bokeh_is_loading = js_urls.length;\n",
       "    for (var i = 0; i < js_urls.length; i++) {\n",
       "      var url = js_urls[i];\n",
       "      var s = document.createElement('script');\n",
       "      s.src = url;\n",
       "      s.async = false;\n",
       "      s.onreadystatechange = s.onload = function() {\n",
       "        window._bokeh_is_loading--;\n",
       "        if (window._bokeh_is_loading === 0) {\n",
       "          console.log(\"Bokeh: all BokehJS libraries loaded\");\n",
       "          run_callbacks()\n",
       "        }\n",
       "      };\n",
       "      s.onerror = function() {\n",
       "        console.warn(\"failed to load library \" + url);\n",
       "      };\n",
       "      console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
       "      document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "    }\n",
       "  };var element = document.getElementById(\"ba332f2d-cc92-4337-a4b3-67ed92fab092\");\n",
       "  if (element == null) {\n",
       "    console.log(\"Bokeh: ERROR: autoload.js configured with elementid 'ba332f2d-cc92-4337-a4b3-67ed92fab092' but no matching script tag was found. \")\n",
       "    return false;\n",
       "  }\n",
       "\n",
       "  var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.js'];\n",
       "\n",
       "  var inline_js = [\n",
       "    function(Bokeh) {\n",
       "      Bokeh.set_log_level(\"info\");\n",
       "    },\n",
       "    \n",
       "    function(Bokeh) {\n",
       "      \n",
       "      Bokeh.$(\"#ba332f2d-cc92-4337-a4b3-67ed92fab092\").text(\"BokehJS is loading...\");\n",
       "    },\n",
       "    function(Bokeh) {\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n",
       "    }\n",
       "  ];\n",
       "\n",
       "  function run_inline_js() {\n",
       "    \n",
       "    if ((window.Bokeh !== undefined) || (force === \"1\")) {\n",
       "      for (var i = 0; i < inline_js.length; i++) {\n",
       "        inline_js[i](window.Bokeh);\n",
       "      }if (force === \"1\") {\n",
       "        display_loaded();\n",
       "      }} else if (Date.now() < window._bokeh_timeout) {\n",
       "      setTimeout(run_inline_js, 100);\n",
       "    } else if (!window._bokeh_failed_load) {\n",
       "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
       "      window._bokeh_failed_load = true;\n",
       "    } else if (!force) {\n",
       "      var cell = $(\"#ba332f2d-cc92-4337-a4b3-67ed92fab092\").parents('.cell').data().cell;\n",
       "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
       "    }\n",
       "\n",
       "  }\n",
       "\n",
       "  if (window._bokeh_is_loading === 0) {\n",
       "    console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
       "    run_inline_js();\n",
       "  } else {\n",
       "    load_libs(js_urls, function() {\n",
       "      console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n",
       "      run_inline_js();\n",
       "    });\n",
       "  }\n",
       "}(this));"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from sklearn import datasets\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import bokeh\n",
    "from bokeh.plotting import output_notebook\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "from datascienceutils import analyze\n",
    "from datascienceutils import predictiveModels as pm\n",
    "from datascienceutils import sklearnUtils as sku\n",
    "\n",
    "output_notebook()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:59:24.442725",
     "start_time": "2017-11-17T21:59:24.115912"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "irisDf = pd.read_excel('/home/anand/DataScientist/data/titanic3.xls')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:59:37.389224",
     "start_time": "2017-11-17T21:59:37.352020"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pclass</th>\n",
       "      <th>survived</th>\n",
       "      <th>name</th>\n",
       "      <th>sex</th>\n",
       "      <th>age</th>\n",
       "      <th>sibsp</th>\n",
       "      <th>parch</th>\n",
       "      <th>ticket</th>\n",
       "      <th>fare</th>\n",
       "      <th>cabin</th>\n",
       "      <th>embarked</th>\n",
       "      <th>boat</th>\n",
       "      <th>body</th>\n",
       "      <th>home.dest</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Allen, Miss. Elisabeth Walton</td>\n",
       "      <td>female</td>\n",
       "      <td>29.0000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>24160</td>\n",
       "      <td>211.3375</td>\n",
       "      <td>B5</td>\n",
       "      <td>S</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>St Louis, MO</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Allison, Master. Hudson Trevor</td>\n",
       "      <td>male</td>\n",
       "      <td>0.9167</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>113781</td>\n",
       "      <td>151.5500</td>\n",
       "      <td>C22 C26</td>\n",
       "      <td>S</td>\n",
       "      <td>11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Montreal, PQ / Chesterville, ON</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Allison, Miss. Helen Loraine</td>\n",
       "      <td>female</td>\n",
       "      <td>2.0000</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>113781</td>\n",
       "      <td>151.5500</td>\n",
       "      <td>C22 C26</td>\n",
       "      <td>S</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Montreal, PQ / Chesterville, ON</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Allison, Mr. Hudson Joshua Creighton</td>\n",
       "      <td>male</td>\n",
       "      <td>30.0000</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>113781</td>\n",
       "      <td>151.5500</td>\n",
       "      <td>C22 C26</td>\n",
       "      <td>S</td>\n",
       "      <td>NaN</td>\n",
       "      <td>135.0</td>\n",
       "      <td>Montreal, PQ / Chesterville, ON</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>Allison, Mrs. Hudson J C (Bessie Waldo Daniels)</td>\n",
       "      <td>female</td>\n",
       "      <td>25.0000</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>113781</td>\n",
       "      <td>151.5500</td>\n",
       "      <td>C22 C26</td>\n",
       "      <td>S</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Montreal, PQ / Chesterville, ON</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pclass  survived                                             name     sex  \\\n",
       "0       1         1                    Allen, Miss. Elisabeth Walton  female   \n",
       "1       1         1                   Allison, Master. Hudson Trevor    male   \n",
       "2       1         0                     Allison, Miss. Helen Loraine  female   \n",
       "3       1         0             Allison, Mr. Hudson Joshua Creighton    male   \n",
       "4       1         0  Allison, Mrs. Hudson J C (Bessie Waldo Daniels)  female   \n",
       "\n",
       "       age  sibsp  parch  ticket      fare    cabin embarked boat   body  \\\n",
       "0  29.0000      0      0   24160  211.3375       B5        S    2    NaN   \n",
       "1   0.9167      1      2  113781  151.5500  C22 C26        S   11    NaN   \n",
       "2   2.0000      1      2  113781  151.5500  C22 C26        S  NaN    NaN   \n",
       "3  30.0000      1      2  113781  151.5500  C22 C26        S  NaN  135.0   \n",
       "4  25.0000      1      2  113781  151.5500  C22 C26        S  NaN    NaN   \n",
       "\n",
       "                         home.dest  \n",
       "0                     St Louis, MO  \n",
       "1  Montreal, PQ / Chesterville, ON  \n",
       "2  Montreal, PQ / Chesterville, ON  \n",
       "3  Montreal, PQ / Chesterville, ON  \n",
       "4  Montreal, PQ / Chesterville, ON  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "irisDf.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:59:37.746676",
     "start_time": "2017-11-17T21:59:37.451522"
    },
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "target = irisDf.survived\n",
    "copy_df = irisDf.copy(deep=True)\n",
    "\n",
    "copy_df.drop('survived', 1, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T21:59:37.989922",
     "start_time": "2017-11-17T21:59:37.809833"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['B5', 'C22 C26', 'E12', 'D7', 'A36', 'C101', nan, 'C62 C64', 'B35',\n",
       "       'A23', 'B58 B60', 'D15', 'C6', 'D35', 'C148', 'C97', 'B49', 'C99',\n",
       "       'C52', 'T', 'A31', 'C7', 'C103', 'D22', 'E33', 'A21', 'B10', 'B4',\n",
       "       'E40', 'B38', 'E24', 'B51 B53 B55', 'B96 B98', 'C46', 'E31', 'E8',\n",
       "       'B61', 'B77', 'A9', 'C89', 'A14', 'E58', 'E49', 'E52', 'E45', 'B22',\n",
       "       'B26', 'C85', 'E17', 'B71', 'B20', 'A34', 'C86', 'A16', 'A20',\n",
       "       'A18', 'C54', 'C45', 'D20', 'A29', 'C95', 'E25', 'C111',\n",
       "       'C23 C25 C27', 'E36', 'D34', 'D40', 'B39', 'B41', 'B102', 'C123',\n",
       "       'E63', 'C130', 'B86', 'C92', 'A5', 'C51', 'B42', 'C91', 'C125',\n",
       "       'D10 D12', 'B82 B84', 'E50', 'D33', 'C83', 'B94', 'D49', 'D45',\n",
       "       'B69', 'B11', 'E46', 'C39', 'B18', 'D11', 'C93', 'B28', 'C49',\n",
       "       'B52 B54 B56', 'E60', 'C132', 'B37', 'D21', 'D19', 'C124', 'D17',\n",
       "       'B101', 'D28', 'D6', 'D9', 'B80', 'C106', 'B79', 'C47', 'D30',\n",
       "       'C90', 'E38', 'C78', 'C30', 'C118', 'D36', 'D48', 'D47', 'C105',\n",
       "       'B36', 'B30', 'D43', 'B24', 'C2', 'C65', 'B73', 'C104', 'C110',\n",
       "       'C50', 'B3', 'A24', 'A32', 'A11', 'A10', 'B57 B59 B63 B66', 'C28',\n",
       "       'E44', 'A26', 'A6', 'A7', 'C31', 'A19', 'B45', 'E34', 'B78', 'B50',\n",
       "       'C87', 'C116', 'C55 C57', 'D50', 'E68', 'E67', 'C126', 'C68', 'C70',\n",
       "       'C53', 'B19', 'D46', 'D37', 'D26', 'C32', 'C80', 'C82', 'C128',\n",
       "       'E39 E41', 'D', 'F4', 'D56', 'F33', 'E101', 'E77', 'F2', 'D38', 'F',\n",
       "       'F G63', 'F E57', 'F E46', 'F G73', 'E121', 'F E69', 'E10', 'G6',\n",
       "       'F38'], dtype=object)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "copy_df['cabin'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:08.793187",
     "start_time": "2017-11-17T22:00:08.787828"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1],\n",
       "       [0],\n",
       "       [0],\n",
       "       [1]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import preprocessing\n",
    "lb = preprocessing.LabelBinarizer()\n",
    "lb.fit_transform(['yes', 'no', 'no', 'yes'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.082945",
     "start_time": "2017-11-17T22:00:08.855832"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "## FEATURE ENGINEERING\n",
    "copy_df['sex'] = sku.encode_labels(copy_df, 'sex') # Encode gender to number\n",
    "copy_df['cabin'] = copy_df['cabin'].apply(lambda x: str(x))\n",
    "copy_df['cabin_class'] = copy_df['cabin'].apply(lambda x: [s for s in x.split() if not s.isdigit()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:03:50.721771",
     "start_time": "2017-11-17T22:03:50.686264"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pclass</th>\n",
       "      <th>name</th>\n",
       "      <th>sex</th>\n",
       "      <th>age</th>\n",
       "      <th>sibsp</th>\n",
       "      <th>parch</th>\n",
       "      <th>ticket</th>\n",
       "      <th>fare</th>\n",
       "      <th>cabin</th>\n",
       "      <th>embarked</th>\n",
       "      <th>boat</th>\n",
       "      <th>body</th>\n",
       "      <th>home.dest</th>\n",
       "      <th>cabin_class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Allen, Miss. Elisabeth Walton</td>\n",
       "      <td>0</td>\n",
       "      <td>29.0000</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>24160</td>\n",
       "      <td>211.3375</td>\n",
       "      <td>B5</td>\n",
       "      <td>S</td>\n",
       "      <td>2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>St Louis, MO</td>\n",
       "      <td>[B5]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Allison, Master. Hudson Trevor</td>\n",
       "      <td>1</td>\n",
       "      <td>0.9167</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>113781</td>\n",
       "      <td>151.5500</td>\n",
       "      <td>C22 C26</td>\n",
       "      <td>S</td>\n",
       "      <td>11</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Montreal, PQ / Chesterville, ON</td>\n",
       "      <td>[C22, C26]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pclass                            name  sex      age  sibsp  parch  ticket  \\\n",
       "0       1   Allen, Miss. Elisabeth Walton    0  29.0000      0      0   24160   \n",
       "1       1  Allison, Master. Hudson Trevor    1   0.9167      1      2  113781   \n",
       "\n",
       "       fare    cabin embarked boat  body                        home.dest  \\\n",
       "0  211.3375       B5        S    2   NaN                     St Louis, MO   \n",
       "1  151.5500  C22 C26        S   11   NaN  Montreal, PQ / Chesterville, ON   \n",
       "\n",
       "  cabin_class  \n",
       "0        [B5]  \n",
       "1  [C22, C26]  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "copy_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.897885",
     "start_time": "2017-11-17T22:00:09.145365"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "setting an array element with a sequence.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-9-30d4bda23342>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcopy_df\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[1;31m# Train the model using the training sets\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mlin_model\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'LinearRegression'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Coefficients: \\n'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlin_model\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/datascienceutils-1.2.19-py3.5.egg/datascienceutils/predictiveModels.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(dataframe, target, modelType, column, **kwargs)\u001b[0m\n\u001b[0;32m     32\u001b[0m         \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     33\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 34\u001b[1;33m         \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     35\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     36\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m    510\u001b[0m         \u001b[0mn_jobs_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    511\u001b[0m         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[1;32m--> 512\u001b[1;33m                          y_numeric=True, multi_output=True)\n\u001b[0m\u001b[0;32m    513\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    514\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    519\u001b[0m     X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,\n\u001b[0;32m    520\u001b[0m                     \u001b[0mensure_2d\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mallow_nd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_min_samples\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 521\u001b[1;33m                     ensure_min_features, warn_on_dtype, estimator)\n\u001b[0m\u001b[0;32m    522\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    523\u001b[0m         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    400\u001b[0m         \u001b[1;31m# make sure we actually converted to numeric:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    401\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mdtype_numeric\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"O\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 402\u001b[1;33m             \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    403\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mallow_nd\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>=\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    404\u001b[0m             raise ValueError(\"Found array with dim %d. %s expected <= 2.\"\n",
      "\u001b[1;31mValueError\u001b[0m: setting an array element with a sequence."
     ]
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(copy_df, target)\n",
    "# Train the model using the training sets\n",
    "lin_model = pm.train(X_train, y_train, 'LinearRegression')\n",
    "\n",
    "print('Coefficients: \\n', lin_model.coef_)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((lin_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % lin_model.score(X_test, y_test))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:01:47.908691",
     "start_time": "2017-11-17T22:01:47.677245"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "setting an array element with a sequence.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-11-a6973d76ea01>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# Train the model using the training sets\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mlog_model\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'LogisticRegression'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[1;31m#print('Coefficients: \\n', log_model.coef_)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;31m# The mean squared error\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/datascienceutils-1.2.19-py3.5.egg/datascienceutils/predictiveModels.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(dataframe, target, modelType, column, **kwargs)\u001b[0m\n\u001b[0;32m     32\u001b[0m         \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     33\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 34\u001b[1;33m         \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     35\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     36\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/linear_model/logistic.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m   1171\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1172\u001b[0m         X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,\n\u001b[1;32m-> 1173\u001b[1;33m                          order=\"C\")\n\u001b[0m\u001b[0;32m   1174\u001b[0m         \u001b[0mcheck_classification_targets\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1175\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclasses_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munique\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    519\u001b[0m     X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,\n\u001b[0;32m    520\u001b[0m                     \u001b[0mensure_2d\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mallow_nd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_min_samples\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 521\u001b[1;33m                     ensure_min_features, warn_on_dtype, estimator)\n\u001b[0m\u001b[0;32m    522\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    523\u001b[0m         y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    380\u001b[0m                                       force_all_finite)\n\u001b[0;32m    381\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 382\u001b[1;33m         \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    383\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    384\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: setting an array element with a sequence."
     ]
    }
   ],
   "source": [
    "# Train the model using the training sets\n",
    "log_model = pm.train(X_train, y_train, 'LogisticRegression')\n",
    "\n",
    "#print('Coefficients: \\n', log_model.coef_)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((log_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % log_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:01:55.720074",
     "start_time": "2017-11-17T22:01:55.674078"
    },
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "setting an array element with a sequence.",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-12-7851e5fc266c>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m# Train the model using the training sets\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mrf_model\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'randomForest'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[1;31m#print('Coefficients: \\n', rf_model.coef_)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[1;31m# The mean squared error\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/datascienceutils-1.2.19-py3.5.egg/datascienceutils/predictiveModels.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(dataframe, target, modelType, column, **kwargs)\u001b[0m\n\u001b[0;32m     32\u001b[0m         \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     33\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 34\u001b[1;33m         \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     35\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     36\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/ensemble/forest.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m    245\u001b[0m         \"\"\"\n\u001b[0;32m    246\u001b[0m         \u001b[1;31m# Validate or convert input data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 247\u001b[1;33m         \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"csc\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    248\u001b[0m         \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'csc'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    249\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0missparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m    380\u001b[0m                                       force_all_finite)\n\u001b[0;32m    381\u001b[0m     \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 382\u001b[1;33m         \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    383\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    384\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: setting an array element with a sequence."
     ]
    }
   ],
   "source": [
    "# Train the model using the training sets\n",
    "rf_model = pm.train(X_train, y_train, 'randomForest')\n",
    "\n",
    "#print('Coefficients: \\n', rf_model.coef_)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((rf_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % rf_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.901290",
     "start_time": "2017-11-17T16:30:08.798Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Train the model using the training sets\n",
    "sgd_model = pm.train(X_train, y_train, 'sgd')\n",
    "sgd_model.fit(X_train, y_train)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((sgd_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % sgd_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.901893",
     "start_time": "2017-11-17T16:30:08.809Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Train the model using the training sets\n",
    "xgb_model = pm.train(X_train, y_train, 'xgboost')\n",
    "xgb_model.fit(X_train, y_train)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((xgb_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % xgb_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.902492",
     "start_time": "2017-11-17T16:30:08.823Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Train the model using the training sets\n",
    "svm_model = pm.train(X_train, y_train, 'svm')\n",
    "svm_model.fit(X_train, y_train)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((svm_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % svm_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.903094",
     "start_time": "2017-11-17T16:30:08.839Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Train the model using the training sets\n",
    "bnb_model = pm.train(X_train, y_train, 'bernoulliNB')\n",
    "bnb_model.fit(X_train, y_train)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((bnb_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % bnb_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.903706",
     "start_time": "2017-11-17T16:30:08.851Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Train the model using the training sets\n",
    "knn_model = pm.train(X_train, y_train, 'knn')\n",
    "knn_model.fit(X_train, y_train)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((knn_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % knn_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.904302",
     "start_time": "2017-11-17T16:30:08.864Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Train the model using the training sets\n",
    "kde_model = pm.train(X_train, y_train, 'kde')\n",
    "kde_model.fit(X_train, y_train)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((kde_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % kde_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.904893",
     "start_time": "2017-11-17T16:30:08.874Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Train the model using the training sets\n",
    "kde_model = pm.train(X_train, y_train, 'kde')\n",
    "kde_model.fit(X_train, y_train)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((kde_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % kde_model.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-11-17T22:00:09.905487",
     "start_time": "2017-11-17T16:30:08.885Z"
    },
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Train the model using the training sets\n",
    "mnb_model = pm.train(X_train, y_train, 'multinomialNB')\n",
    "\n",
    "print('Coefficients: \\n', mnb_model.coef_)\n",
    "# The mean squared error\n",
    "print(\"Mean squared error: %.2f\"\n",
    "      % np.mean((mnb_model.predict(X_test) - y_test) ** 2))\n",
    "# Explained variance score: 1 is perfect prediction\n",
    "print('Variance score: %.2f' % mnb_model.score(X_test, y_test))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}