{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T21:59:10.316323", "start_time": "2017-11-17T21:59:07.658334" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(global) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = \"1\";\n", "\n", " if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force !== \"\") {\n", " window._bokeh_onload_callbacks = [];\n", " window._bokeh_is_loading = undefined;\n", " }\n", "\n", "\n", " \n", " if (typeof (window._bokeh_timeout) === \"undefined\" || force !== \"\") {\n", " window._bokeh_timeout = Date.now() + 5000;\n", " window._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " if (window.Bokeh !== undefined) {\n", " Bokeh.$(\"#ba332f2d-cc92-4337-a4b3-67ed92fab092\").text(\"BokehJS successfully loaded.\");\n", " } else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", " function run_callbacks() {\n", " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", " delete window._bokeh_onload_callbacks\n", " console.info(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(js_urls, callback) {\n", " window._bokeh_onload_callbacks.push(callback);\n", " if (window._bokeh_is_loading > 0) {\n", " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " window._bokeh_is_loading = js_urls.length;\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var s = document.createElement('script');\n", " s.src = url;\n", " s.async = false;\n", " s.onreadystatechange = s.onload = function() {\n", " window._bokeh_is_loading--;\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", " run_callbacks()\n", " }\n", " };\n", " s.onerror = function() {\n", " console.warn(\"failed to load library \" + url);\n", " };\n", " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", " }\n", " };var element = document.getElementById(\"ba332f2d-cc92-4337-a4b3-67ed92fab092\");\n", " if (element == null) {\n", " console.log(\"Bokeh: ERROR: autoload.js configured with elementid 'ba332f2d-cc92-4337-a4b3-67ed92fab092' but no matching script tag was found. \")\n", " return false;\n", " }\n", "\n", " var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.js'];\n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " \n", " function(Bokeh) {\n", " \n", " Bokeh.$(\"#ba332f2d-cc92-4337-a4b3-67ed92fab092\").text(\"BokehJS is loading...\");\n", " },\n", " function(Bokeh) {\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " \n", " if ((window.Bokeh !== undefined) || (force === \"1\")) {\n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i](window.Bokeh);\n", " }if (force === \"1\") {\n", " display_loaded();\n", " }} else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!window._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " window._bokeh_failed_load = true;\n", " } else if (!force) {\n", " var cell = $(\"#ba332f2d-cc92-4337-a4b3-67ed92fab092\").parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", "\n", " }\n", "\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(js_urls, function() {\n", " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(this));" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sklearn import datasets\n", "import numpy as np\n", "import pandas as pd\n", "import bokeh\n", "from bokeh.plotting import output_notebook\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "from datascienceutils import analyze\n", "from datascienceutils import predictiveModels as pm\n", "from datascienceutils import sklearnUtils as sku\n", "\n", "output_notebook()\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T21:59:24.442725", "start_time": "2017-11-17T21:59:24.115912" }, "collapsed": true }, "outputs": [], "source": [ "irisDf = pd.read_excel('/home/anand/DataScientist/data/titanic3.xls')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T21:59:37.389224", "start_time": "2017-11-17T21:59:37.352020" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclasssurvivednamesexagesibspparchticketfarecabinembarkedboatbodyhome.dest
011Allen, Miss. Elisabeth Waltonfemale29.00000024160211.3375B5S2NaNSt Louis, MO
111Allison, Master. Hudson Trevormale0.916712113781151.5500C22 C26S11NaNMontreal, PQ / Chesterville, ON
210Allison, Miss. Helen Lorainefemale2.000012113781151.5500C22 C26SNaNNaNMontreal, PQ / Chesterville, ON
310Allison, Mr. Hudson Joshua Creightonmale30.000012113781151.5500C22 C26SNaN135.0Montreal, PQ / Chesterville, ON
410Allison, Mrs. Hudson J C (Bessie Waldo Daniels)female25.000012113781151.5500C22 C26SNaNNaNMontreal, PQ / Chesterville, ON
\n", "
" ], "text/plain": [ " pclass survived name sex \\\n", "0 1 1 Allen, Miss. Elisabeth Walton female \n", "1 1 1 Allison, Master. Hudson Trevor male \n", "2 1 0 Allison, Miss. Helen Loraine female \n", "3 1 0 Allison, Mr. Hudson Joshua Creighton male \n", "4 1 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) female \n", "\n", " age sibsp parch ticket fare cabin embarked boat body \\\n", "0 29.0000 0 0 24160 211.3375 B5 S 2 NaN \n", "1 0.9167 1 2 113781 151.5500 C22 C26 S 11 NaN \n", "2 2.0000 1 2 113781 151.5500 C22 C26 S NaN NaN \n", "3 30.0000 1 2 113781 151.5500 C22 C26 S NaN 135.0 \n", "4 25.0000 1 2 113781 151.5500 C22 C26 S NaN NaN \n", "\n", " home.dest \n", "0 St Louis, MO \n", "1 Montreal, PQ / Chesterville, ON \n", "2 Montreal, PQ / Chesterville, ON \n", "3 Montreal, PQ / Chesterville, ON \n", "4 Montreal, PQ / Chesterville, ON " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "irisDf.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T21:59:37.746676", "start_time": "2017-11-17T21:59:37.451522" }, "collapsed": true }, "outputs": [], "source": [ "target = irisDf.survived\n", "copy_df = irisDf.copy(deep=True)\n", "\n", "copy_df.drop('survived', 1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T21:59:37.989922", "start_time": "2017-11-17T21:59:37.809833" }, "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array(['B5', 'C22 C26', 'E12', 'D7', 'A36', 'C101', nan, 'C62 C64', 'B35',\n", " 'A23', 'B58 B60', 'D15', 'C6', 'D35', 'C148', 'C97', 'B49', 'C99',\n", " 'C52', 'T', 'A31', 'C7', 'C103', 'D22', 'E33', 'A21', 'B10', 'B4',\n", " 'E40', 'B38', 'E24', 'B51 B53 B55', 'B96 B98', 'C46', 'E31', 'E8',\n", " 'B61', 'B77', 'A9', 'C89', 'A14', 'E58', 'E49', 'E52', 'E45', 'B22',\n", " 'B26', 'C85', 'E17', 'B71', 'B20', 'A34', 'C86', 'A16', 'A20',\n", " 'A18', 'C54', 'C45', 'D20', 'A29', 'C95', 'E25', 'C111',\n", " 'C23 C25 C27', 'E36', 'D34', 'D40', 'B39', 'B41', 'B102', 'C123',\n", " 'E63', 'C130', 'B86', 'C92', 'A5', 'C51', 'B42', 'C91', 'C125',\n", " 'D10 D12', 'B82 B84', 'E50', 'D33', 'C83', 'B94', 'D49', 'D45',\n", " 'B69', 'B11', 'E46', 'C39', 'B18', 'D11', 'C93', 'B28', 'C49',\n", " 'B52 B54 B56', 'E60', 'C132', 'B37', 'D21', 'D19', 'C124', 'D17',\n", " 'B101', 'D28', 'D6', 'D9', 'B80', 'C106', 'B79', 'C47', 'D30',\n", " 'C90', 'E38', 'C78', 'C30', 'C118', 'D36', 'D48', 'D47', 'C105',\n", " 'B36', 'B30', 'D43', 'B24', 'C2', 'C65', 'B73', 'C104', 'C110',\n", " 'C50', 'B3', 'A24', 'A32', 'A11', 'A10', 'B57 B59 B63 B66', 'C28',\n", " 'E44', 'A26', 'A6', 'A7', 'C31', 'A19', 'B45', 'E34', 'B78', 'B50',\n", " 'C87', 'C116', 'C55 C57', 'D50', 'E68', 'E67', 'C126', 'C68', 'C70',\n", " 'C53', 'B19', 'D46', 'D37', 'D26', 'C32', 'C80', 'C82', 'C128',\n", " 'E39 E41', 'D', 'F4', 'D56', 'F33', 'E101', 'E77', 'F2', 'D38', 'F',\n", " 'F G63', 'F E57', 'F E46', 'F G73', 'E121', 'F E69', 'E10', 'G6',\n", " 'F38'], dtype=object)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "copy_df['cabin'].unique()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:08.793187", "start_time": "2017-11-17T22:00:08.787828" }, "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([[1],\n", " [0],\n", " [0],\n", " [1]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn import preprocessing\n", "lb = preprocessing.LabelBinarizer()\n", "lb.fit_transform(['yes', 'no', 'no', 'yes'])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.082945", "start_time": "2017-11-17T22:00:08.855832" }, "collapsed": false }, "outputs": [], "source": [ "## FEATURE ENGINEERING\n", "copy_df['sex'] = sku.encode_labels(copy_df, 'sex') # Encode gender to number\n", "copy_df['cabin'] = copy_df['cabin'].apply(lambda x: str(x))\n", "copy_df['cabin_class'] = copy_df['cabin'].apply(lambda x: [s for s in x.split() if not s.isdigit()])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:03:50.721771", "start_time": "2017-11-17T22:03:50.686264" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclassnamesexagesibspparchticketfarecabinembarkedboatbodyhome.destcabin_class
01Allen, Miss. Elisabeth Walton029.00000024160211.3375B5S2NaNSt Louis, MO[B5]
11Allison, Master. Hudson Trevor10.916712113781151.5500C22 C26S11NaNMontreal, PQ / Chesterville, ON[C22, C26]
\n", "
" ], "text/plain": [ " pclass name sex age sibsp parch ticket \\\n", "0 1 Allen, Miss. Elisabeth Walton 0 29.0000 0 0 24160 \n", "1 1 Allison, Master. Hudson Trevor 1 0.9167 1 2 113781 \n", "\n", " fare cabin embarked boat body home.dest \\\n", "0 211.3375 B5 S 2 NaN St Louis, MO \n", "1 151.5500 C22 C26 S 11 NaN Montreal, PQ / Chesterville, ON \n", "\n", " cabin_class \n", "0 [B5] \n", "1 [C22, C26] " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "copy_df.head(2)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.897885", "start_time": "2017-11-17T22:00:09.145365" }, "collapsed": false }, "outputs": [ { "ename": "ValueError", "evalue": "setting an array element with a sequence.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_test\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_test_split\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcopy_df\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[1;31m# Train the model using the training sets\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 3\u001b[1;33m \u001b[0mlin_model\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'LinearRegression'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Coefficients: \\n'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlin_model\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcoef_\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/datascienceutils-1.2.19-py3.5.egg/datascienceutils/predictiveModels.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(dataframe, target, modelType, column, **kwargs)\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 34\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 35\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 36\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[0mn_jobs_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 511\u001b[0m X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[1;32m--> 512\u001b[1;33m y_numeric=True, multi_output=True)\n\u001b[0m\u001b[0;32m 513\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 514\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m 519\u001b[0m X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,\n\u001b[0;32m 520\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mallow_nd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_min_samples\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 521\u001b[1;33m ensure_min_features, warn_on_dtype, estimator)\n\u001b[0m\u001b[0;32m 522\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 523\u001b[0m y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m 400\u001b[0m \u001b[1;31m# make sure we actually converted to numeric:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 401\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdtype_numeric\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mkind\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m\"O\"\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 402\u001b[1;33m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfloat64\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 403\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mallow_nd\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>=\u001b[0m \u001b[1;36m3\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 404\u001b[0m raise ValueError(\"Found array with dim %d. %s expected <= 2.\"\n", "\u001b[1;31mValueError\u001b[0m: setting an array element with a sequence." ] } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split(copy_df, target)\n", "# Train the model using the training sets\n", "lin_model = pm.train(X_train, y_train, 'LinearRegression')\n", "\n", "print('Coefficients: \\n', lin_model.coef_)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((lin_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % lin_model.score(X_test, y_test))\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:01:47.908691", "start_time": "2017-11-17T22:01:47.677245" }, "collapsed": false }, "outputs": [ { "ename": "ValueError", "evalue": "setting an array element with a sequence.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# Train the model using the training sets\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mlog_model\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'LogisticRegression'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;31m#print('Coefficients: \\n', log_model.coef_)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# The mean squared error\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/datascienceutils-1.2.19-py3.5.egg/datascienceutils/predictiveModels.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(dataframe, target, modelType, column, **kwargs)\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 34\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 35\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 36\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/linear_model/logistic.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 1171\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1172\u001b[0m X, y = check_X_y(X, y, accept_sparse='csr', dtype=np.float64,\n\u001b[1;32m-> 1173\u001b[1;33m order=\"C\")\n\u001b[0m\u001b[0;32m 1174\u001b[0m \u001b[0mcheck_classification_targets\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1175\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mclasses_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munique\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m 519\u001b[0m X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,\n\u001b[0;32m 520\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mallow_nd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_min_samples\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 521\u001b[1;33m ensure_min_features, warn_on_dtype, estimator)\n\u001b[0m\u001b[0;32m 522\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 523\u001b[0m y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m 380\u001b[0m force_all_finite)\n\u001b[0;32m 381\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 382\u001b[1;33m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 383\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 384\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: setting an array element with a sequence." ] } ], "source": [ "# Train the model using the training sets\n", "log_model = pm.train(X_train, y_train, 'LogisticRegression')\n", "\n", "#print('Coefficients: \\n', log_model.coef_)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((log_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % log_model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:01:55.720074", "start_time": "2017-11-17T22:01:55.674078" }, "collapsed": false }, "outputs": [ { "ename": "ValueError", "evalue": "setting an array element with a sequence.", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m# Train the model using the training sets\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mrf_model\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'randomForest'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;31m#print('Coefficients: \\n', rf_model.coef_)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# The mean squared error\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/datascienceutils-1.2.19-py3.5.egg/datascienceutils/predictiveModels.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(dataframe, target, modelType, column, **kwargs)\u001b[0m\n\u001b[0;32m 32\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 34\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 35\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 36\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/ensemble/forest.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 245\u001b[0m \"\"\"\n\u001b[0;32m 246\u001b[0m \u001b[1;31m# Validate or convert input data\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 247\u001b[1;33m \u001b[0mX\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"csc\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mDTYPE\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 248\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'csc'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 249\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0missparse\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m 380\u001b[0m force_all_finite)\n\u001b[0;32m 381\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 382\u001b[1;33m \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 383\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 384\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: setting an array element with a sequence." ] } ], "source": [ "# Train the model using the training sets\n", "rf_model = pm.train(X_train, y_train, 'randomForest')\n", "\n", "#print('Coefficients: \\n', rf_model.coef_)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((rf_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % rf_model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.901290", "start_time": "2017-11-17T16:30:08.798Z" }, "collapsed": false }, "outputs": [], "source": [ "# Train the model using the training sets\n", "sgd_model = pm.train(X_train, y_train, 'sgd')\n", "sgd_model.fit(X_train, y_train)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((sgd_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % sgd_model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.901893", "start_time": "2017-11-17T16:30:08.809Z" }, "collapsed": false }, "outputs": [], "source": [ "# Train the model using the training sets\n", "xgb_model = pm.train(X_train, y_train, 'xgboost')\n", "xgb_model.fit(X_train, y_train)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((xgb_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % xgb_model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.902492", "start_time": "2017-11-17T16:30:08.823Z" }, "collapsed": false }, "outputs": [], "source": [ "# Train the model using the training sets\n", "svm_model = pm.train(X_train, y_train, 'svm')\n", "svm_model.fit(X_train, y_train)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((svm_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % svm_model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.903094", "start_time": "2017-11-17T16:30:08.839Z" }, "collapsed": false }, "outputs": [], "source": [ "# Train the model using the training sets\n", "bnb_model = pm.train(X_train, y_train, 'bernoulliNB')\n", "bnb_model.fit(X_train, y_train)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((bnb_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % bnb_model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.903706", "start_time": "2017-11-17T16:30:08.851Z" }, "collapsed": false }, "outputs": [], "source": [ "# Train the model using the training sets\n", "knn_model = pm.train(X_train, y_train, 'knn')\n", "knn_model.fit(X_train, y_train)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((knn_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % knn_model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.904302", "start_time": "2017-11-17T16:30:08.864Z" }, "collapsed": false }, "outputs": [], "source": [ "# Train the model using the training sets\n", "kde_model = pm.train(X_train, y_train, 'kde')\n", "kde_model.fit(X_train, y_train)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((kde_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % kde_model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.904893", "start_time": "2017-11-17T16:30:08.874Z" }, "collapsed": false }, "outputs": [], "source": [ "# Train the model using the training sets\n", "kde_model = pm.train(X_train, y_train, 'kde')\n", "kde_model.fit(X_train, y_train)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((kde_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % kde_model.score(X_test, y_test))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2017-11-17T22:00:09.905487", "start_time": "2017-11-17T16:30:08.885Z" }, "collapsed": false }, "outputs": [], "source": [ "# Train the model using the training sets\n", "mnb_model = pm.train(X_train, y_train, 'multinomialNB')\n", "\n", "print('Coefficients: \\n', mnb_model.coef_)\n", "# The mean squared error\n", "print(\"Mean squared error: %.2f\"\n", " % np.mean((mnb_model.predict(X_test) - y_test) ** 2))\n", "# Explained variance score: 1 is perfect prediction\n", "print('Variance score: %.2f' % mnb_model.score(X_test, y_test))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }