{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2017-11-17T21:57:31.109861",
"start_time": "2017-11-17T21:57:31.095000"
},
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
"
\n",
"
Loading BokehJS ...\n",
"
"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/javascript": [
"\n",
"(function(global) {\n",
" function now() {\n",
" return new Date();\n",
" }\n",
"\n",
" var force = \"1\";\n",
"\n",
" if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force !== \"\") {\n",
" window._bokeh_onload_callbacks = [];\n",
" window._bokeh_is_loading = undefined;\n",
" }\n",
"\n",
"\n",
" \n",
" if (typeof (window._bokeh_timeout) === \"undefined\" || force !== \"\") {\n",
" window._bokeh_timeout = Date.now() + 5000;\n",
" window._bokeh_failed_load = false;\n",
" }\n",
"\n",
" var NB_LOAD_WARNING = {'data': {'text/html':\n",
" \"\\n\"+\n",
" \"
\\n\"+\n",
" \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
" \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
" \"
\\n\"+\n",
" \"
\\n\"+\n",
" \"- re-rerun `output_notebook()` to attempt to load from CDN again, or
\\n\"+\n",
" \"- use INLINE resources instead, as so:
\\n\"+\n",
" \"
\\n\"+\n",
" \"
\\n\"+\n",
" \"from bokeh.resources import INLINE\\n\"+\n",
" \"output_notebook(resources=INLINE)\\n\"+\n",
" \"
\\n\"+\n",
" \"
\"}};\n",
"\n",
" function display_loaded() {\n",
" if (window.Bokeh !== undefined) {\n",
" Bokeh.$(\"#c6289e64-189d-4236-a670-421feb29b434\").text(\"BokehJS successfully loaded.\");\n",
" } else if (Date.now() < window._bokeh_timeout) {\n",
" setTimeout(display_loaded, 100)\n",
" }\n",
" }\n",
"\n",
" function run_callbacks() {\n",
" window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n",
" delete window._bokeh_onload_callbacks\n",
" console.info(\"Bokeh: all callbacks have finished\");\n",
" }\n",
"\n",
" function load_libs(js_urls, callback) {\n",
" window._bokeh_onload_callbacks.push(callback);\n",
" if (window._bokeh_is_loading > 0) {\n",
" console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
" return null;\n",
" }\n",
" if (js_urls == null || js_urls.length === 0) {\n",
" run_callbacks();\n",
" return null;\n",
" }\n",
" console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
" window._bokeh_is_loading = js_urls.length;\n",
" for (var i = 0; i < js_urls.length; i++) {\n",
" var url = js_urls[i];\n",
" var s = document.createElement('script');\n",
" s.src = url;\n",
" s.async = false;\n",
" s.onreadystatechange = s.onload = function() {\n",
" window._bokeh_is_loading--;\n",
" if (window._bokeh_is_loading === 0) {\n",
" console.log(\"Bokeh: all BokehJS libraries loaded\");\n",
" run_callbacks()\n",
" }\n",
" };\n",
" s.onerror = function() {\n",
" console.warn(\"failed to load library \" + url);\n",
" };\n",
" console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
" document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
" }\n",
" };var element = document.getElementById(\"c6289e64-189d-4236-a670-421feb29b434\");\n",
" if (element == null) {\n",
" console.log(\"Bokeh: ERROR: autoload.js configured with elementid 'c6289e64-189d-4236-a670-421feb29b434' but no matching script tag was found. \")\n",
" return false;\n",
" }\n",
"\n",
" var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.js'];\n",
"\n",
" var inline_js = [\n",
" function(Bokeh) {\n",
" Bokeh.set_log_level(\"info\");\n",
" },\n",
" \n",
" function(Bokeh) {\n",
" \n",
" Bokeh.$(\"#c6289e64-189d-4236-a670-421feb29b434\").text(\"BokehJS is loading...\");\n",
" },\n",
" function(Bokeh) {\n",
" console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n",
" Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n",
" console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n",
" Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n",
" }\n",
" ];\n",
"\n",
" function run_inline_js() {\n",
" \n",
" if ((window.Bokeh !== undefined) || (force === \"1\")) {\n",
" for (var i = 0; i < inline_js.length; i++) {\n",
" inline_js[i](window.Bokeh);\n",
" }if (force === \"1\") {\n",
" display_loaded();\n",
" }} else if (Date.now() < window._bokeh_timeout) {\n",
" setTimeout(run_inline_js, 100);\n",
" } else if (!window._bokeh_failed_load) {\n",
" console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
" window._bokeh_failed_load = true;\n",
" } else if (!force) {\n",
" var cell = $(\"#c6289e64-189d-4236-a670-421feb29b434\").parents('.cell').data().cell;\n",
" cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
" }\n",
"\n",
" }\n",
"\n",
" if (window._bokeh_is_loading === 0) {\n",
" console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
" run_inline_js();\n",
" } else {\n",
" load_libs(js_urls, function() {\n",
" console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n",
" run_inline_js();\n",
" });\n",
" }\n",
"}(this));"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Custom libraries\n",
"from datascienceutils import plotter\n",
"from datascienceutils import analyze\n",
"\n",
"# Standard libraries\n",
"import json\n",
"%matplotlib inline\n",
"import datetime\n",
"import numpy as np\n",
"import pandas as pd\n",
"import random\n",
"\n",
"\n",
"from bokeh.plotting import figure, show, output_file, output_notebook, ColumnDataSource\n",
"from bokeh.charts import Histogram\n",
"import bokeh\n",
"output_notebook()\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2017-11-17T21:57:48.071794",
"start_time": "2017-11-17T21:57:48.022371"
},
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "File b'/home/anand/DataScientist/data/metadata.csv' does not exist",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mirisDf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'/home/anand/DataScientist/data/metadata.csv'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)\u001b[0m\n\u001b[0;32m 653\u001b[0m skip_blank_lines=skip_blank_lines)\n\u001b[0;32m 654\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 655\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 656\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 657\u001b[0m \u001b[0mparser_f\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m 403\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 404\u001b[0m \u001b[1;31m# Create the parser.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 405\u001b[1;33m \u001b[0mparser\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 406\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 407\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m 762\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'has_index_names'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'has_index_names'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 763\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 764\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 765\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 766\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[1;34m(self, engine)\u001b[0m\n\u001b[0;32m 983\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_make_engine\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mengine\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'c'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 984\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'c'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 985\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mCParserWrapper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 986\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 987\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mengine\u001b[0m \u001b[1;33m==\u001b[0m \u001b[1;34m'python'\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, src, **kwds)\u001b[0m\n\u001b[0;32m 1603\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'allow_leading_cols'\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex_col\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1604\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1605\u001b[1;33m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_reader\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mparsers\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTextReader\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msrc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1606\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1607\u001b[0m \u001b[1;31m# XXX\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.__cinit__ (pandas/_libs/parsers.c:4209)\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._setup_parser_source (pandas/_libs/parsers.c:8873)\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mFileNotFoundError\u001b[0m: File b'/home/anand/DataScientist/data/metadata.csv' does not exist"
]
}
],
"source": [
"irisDf = pd.read_csv('/home/anand/DataScientist/data/metadata.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:42:13.107760",
"start_time": "2017-02-25T22:42:12.580525"
},
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/numpy/lib/function_base.py:4116: RuntimeWarning: Invalid value encountered in percentile\n",
" interpolation=interpolation)\n"
]
},
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" call_duration | \n",
" antenna_id | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 342.000000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" mean | \n",
" 228.997076 | \n",
" NaN | \n",
"
\n",
" \n",
" std | \n",
" 441.944354 | \n",
" NaN | \n",
"
\n",
" \n",
" min | \n",
" 1.000000 | \n",
" NaN | \n",
"
\n",
" \n",
" 25% | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 50% | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 75% | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" max | \n",
" 3072.000000 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" call_duration antenna_id\n",
"count 342.000000 0.0\n",
"mean 228.997076 NaN\n",
"std 441.944354 NaN\n",
"min 1.000000 NaN\n",
"25% NaN NaN\n",
"50% NaN NaN\n",
"75% NaN NaN\n",
"max 3072.000000 NaN"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"irisDf.describe()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:42:13.241292",
"start_time": "2017-02-25T22:42:13.110968"
},
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" interaction | \n",
" direction | \n",
" correspondent_id | \n",
" datetime | \n",
" call_duration | \n",
" antenna_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" call | \n",
" out | \n",
" +109535800433 | \n",
" 2016-10-16 07:34:52 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" call | \n",
" in | \n",
" +919486957832 | \n",
" 2016-10-16 12:49:40 | \n",
" 15.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" call | \n",
" in | \n",
" +914430811100 | \n",
" 2016-10-16 14:35:26 | \n",
" 101.0 | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" call | \n",
" out | \n",
" +918903421807 | \n",
" 2016-10-16 14:37:47 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 4 | \n",
" call | \n",
" out | \n",
" +914634221807 | \n",
" 2016-10-16 14:38:30 | \n",
" 77.0 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" interaction direction correspondent_id datetime call_duration \\\n",
"0 call out +109535800433 2016-10-16 07:34:52 NaN \n",
"1 call in +919486957832 2016-10-16 12:49:40 15.0 \n",
"2 call in +914430811100 2016-10-16 14:35:26 101.0 \n",
"3 call out +918903421807 2016-10-16 14:37:47 NaN \n",
"4 call out +914634221807 2016-10-16 14:38:30 77.0 \n",
"\n",
" antenna_id \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"irisDf.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:42:13.427606",
"start_time": "2017-02-25T22:42:13.244186"
},
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"call_duration 195314.812308\n",
"antenna_id NaN\n",
"dtype: float64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"irisDf.var()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:42:13.722517",
"start_time": "2017-02-25T22:42:13.430932"
},
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"call_duration 3.96145\n",
"antenna_id NaN\n",
"dtype: float64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"irisDf.skew()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:42:13.925090",
"start_time": "2017-02-25T22:42:13.727277"
},
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" call_duration | \n",
" antenna_id | \n",
"
\n",
" \n",
" \n",
" \n",
" call_duration | \n",
" 1.0 | \n",
" NaN | \n",
"
\n",
" \n",
" antenna_id | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" call_duration antenna_id\n",
"call_duration 1.0 NaN\n",
"antenna_id NaN NaN"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"irisDf.corr()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:42:14.133476",
"start_time": "2017-02-25T22:42:13.926573"
},
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['call_duration', 'antenna_id'], dtype='object')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"irisDf.select_dtypes(include=[np.number]).columns"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:43:03.564120",
"start_time": "2017-02-25T22:43:03.482091"
},
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"# Correlation btw Numerical Columns\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"# Pandas correlation coefficients matrix\n",
" call_duration antenna_id\n",
"call_duration 1.0 NaN\n",
"antenna_id NaN NaN\n",
"# Pandas co-variance coefficients matrix\n",
" call_duration antenna_id\n",
"call_duration 195314.812308 NaN\n",
"antenna_id NaN NaN\n"
]
}
],
"source": [
"analyze.correlation_analyze(irisDf)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:43:17.392063",
"start_time": "2017-02-25T22:43:15.512536"
},
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Variance of call_duration\n",
"195314.812308\n",
"Skewness of call_duration\n",
"3.96144995214\n",
"Variance of antenna_id\n",
"nan\n",
"Skewness of antenna_id\n",
"nan\n",
"Too many categorise for col: correspondent_id can't plot pie-chart\n",
"Too many categorise for col: datetime can't plot pie-chart\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/bokeh/core/compat/bokeh_renderer.py:262: UserWarning: Path marker shapes currently not handled, defaulting to Circle\n",
" warnings.warn(\"Path marker shapes currently not handled, defaulting to Circle\")\n",
"/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/matplotlib/artist.py:224: MatplotlibDeprecationWarning: get_axes has been deprecated in mpl 1.5, please use the\n",
"axes property. A removal date has not been set.\n",
" stacklevel=1)\n",
"/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/bokeh/core/compat/bokeh_renderer.py:295: UserWarning: Path marker sizes support is limited and may not display as expected\n",
" warnings.warn(\"Path marker sizes support is limited and may not display as expected\")\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"analyze.dist_analyze(irisDf)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:46:00.139444",
"start_time": "2017-02-25T22:45:59.910923"
},
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Variance of call_duration\n",
"195314.812308\n",
"Skewness of call_duration\n",
"3.96144995214\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/bokeh/core/compat/bokeh_renderer.py:262: UserWarning: Path marker shapes currently not handled, defaulting to Circle\n",
" warnings.warn(\"Path marker shapes currently not handled, defaulting to Circle\")\n",
"/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/matplotlib/artist.py:224: MatplotlibDeprecationWarning: get_axes has been deprecated in mpl 1.5, please use the\n",
"axes property. A removal date has not been set.\n",
" stacklevel=1)\n",
"/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/bokeh/core/compat/bokeh_renderer.py:295: UserWarning: Path marker sizes support is limited and may not display as expected\n",
" warnings.warn(\"Path marker sizes support is limited and may not display as expected\")\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"analyze.dist_analyze(irisDf, 'call_duration')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:46:00.674097",
"start_time": "2017-02-25T22:46:00.141103"
},
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1752 1752\n"
]
},
{
"ename": "ValueError",
"evalue": "Input contains NaN, infinity or a value too large for dtype('float64').",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0manalyze\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mregression_analyze\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mirisDf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'call_duration'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'datetime'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/datascienceutils-1.1.10-py3.5.egg/datascienceutils/analyze.py\u001b[0m in \u001b[0;36mregression_analyze\u001b[1;34m(df, col1, col2, trainsize, non_linear)\u001b[0m\n\u001b[0;32m 159\u001b[0m \u001b[0mtarget\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnew_df\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcol2\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 160\u001b[0m models = [\n\u001b[1;32m--> 161\u001b[1;33m \u001b[0mpm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_df\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumn\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcol1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodelType\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'LinearRegression'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 162\u001b[0m \u001b[0mpm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_df\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumn\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcol1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodelType\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'RidgeRegression'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 163\u001b[0m \u001b[0mpm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_df\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolumn\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcol1\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmodelType\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'RidgeRegressionCV'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/datascienceutils-1.1.10-py3.5.egg/datascienceutils/predictiveModels.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(dataframe, target, modelType, column, **kwargs)\u001b[0m\n\u001b[0;32m 30\u001b[0m \u001b[0msource\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdataframe\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mcolumn\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 32\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msource\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 33\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataframe\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtarget\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/linear_model/base.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[0;32m 510\u001b[0m \u001b[0mn_jobs_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 511\u001b[0m X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],\n\u001b[1;32m--> 512\u001b[1;33m y_numeric=True, multi_output=True)\n\u001b[0m\u001b[0;32m 513\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 514\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0msample_weight\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0matleast_1d\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msample_weight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_X_y\u001b[1;34m(X, y, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m 519\u001b[0m X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,\n\u001b[0;32m 520\u001b[0m \u001b[0mensure_2d\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mallow_nd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mensure_min_samples\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 521\u001b[1;33m ensure_min_features, warn_on_dtype, estimator)\n\u001b[0m\u001b[0;32m 522\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mmulti_output\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 523\u001b[0m y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,\n",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[1;34m(array, accept_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[0;32m 405\u001b[0m % (array.ndim, estimator_name))\n\u001b[0;32m 406\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mforce_all_finite\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 407\u001b[1;33m \u001b[0m_assert_all_finite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 408\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 409\u001b[0m \u001b[0mshape_repr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_shape_repr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m/home/anand/anaconda3/envs/analytics/lib/python3.5/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36m_assert_all_finite\u001b[1;34m(X)\u001b[0m\n\u001b[0;32m 56\u001b[0m and not np.isfinite(X).all()):\n\u001b[0;32m 57\u001b[0m raise ValueError(\"Input contains NaN, infinity\"\n\u001b[1;32m---> 58\u001b[1;33m \" or a value too large for %r.\" % X.dtype)\n\u001b[0m\u001b[0;32m 59\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 60\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mValueError\u001b[0m: Input contains NaN, infinity or a value too large for dtype('float64')."
]
}
],
"source": [
"analyze.regression_analyze(irisDf, 'call_duration', 'datetime')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"ExecuteTime": {
"end_time": "2017-02-25T22:46:00.675254",
"start_time": "2017-02-25T17:15:59.923Z"
},
"collapsed": false
},
"outputs": [],
"source": [
"irisDf.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"scrolled": false
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
},
"latex_envs": {
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 0
}
},
"nbformat": 4,
"nbformat_minor": 0
}