{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2D Kernel Density Distributions Using Plotly"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### ABOUT THE AUTHOR:\n",
    "This notebook was contributed by [Plotly user Emilia Petrisor](https://plotly.com/~empet).  You can follow Emilia on Twitter [@mathinpython](https://twitter.com/mathinpython) or [GitHub](https://github.com/empet)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Introduction:"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We have two `Excel` files with two columns. We read the files into two `pandas` dataframes and plot\n",
    "for each of them an estimate of the joint distribution of the corresponding two columns. The joint distribution is calcalutated by `scipy.stats.gaussian_kde` [function](http://docs.scipy.org/doc/scipy-0.15.1/reference/generated/scipy.stats.gaussian_kde.html).     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import numpy as np\n",
    "import scipy.stats as st\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read the first file:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'multiannual', u'bachelor-th'], dtype='object')"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xl = pd.ExcelFile(\"Data/CSCEng.xls\")\n",
    "dfc = xl.parse(\"Sheet1\")\n",
    "dfc.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "and the seconed one:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'multiannual', u'bachelor-th'], dtype='object')"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xl = pd.ExcelFile(\"Data/SystEng.xls\")\n",
    "dfi = xl.parse(\"Sheet1\")\n",
    "dfi.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The contour plot of the joint distribution of two variables (columns) is colored with a custom  colorscale: "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cubehelix_cs=[[0.0, '#fcf9f7'],\n",
    " [0.16666666666666666, '#edcfc9'],\n",
    " [0.3333333333333333, '#daa2ac'],\n",
    " [0.5, '#bc7897'],\n",
    " [0.6666666666666666, '#925684'],\n",
    " [0.8333333333333333, '#5f3868'],\n",
    " [1.0, '#2d1e3e']]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The function `kde_scipy` returns data for Plotly contour plot of the estimated 2D distribution:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def kde_scipy( vals1, vals2, (a,b), (c,d), N ):\n",
    "    \n",
    "    #vals1, vals2 are the values of two variables (columns)\n",
    "    #(a,b) interval for vals1; usually larger than (np.min(vals1), np.max(vals1))\n",
    "    #(c,d) -\"-          vals2 \n",
    "    \n",
    "    x=np.linspace(a,b,N)\n",
    "    y=np.linspace(c,d,N)\n",
    "    X,Y=np.meshgrid(x,y)\n",
    "    positions = np.vstack([Y.ravel(), X.ravel()])\n",
    "\n",
    "    values = np.vstack([vals1, vals2])\n",
    "    kernel = st.gaussian_kde(values)\n",
    "    Z = np.reshape(kernel(positions).T, X.shape)\n",
    "    \n",
    "    return [x, y, Z]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Contour plot of the joint distribution of data from the first file ###"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import plotly.plotly as py\n",
    "from plotly.graph_objs import *   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def make_kdeplot(varX, varY, (a,b), (c,d), N, colorsc, title):\n",
    "    #varX, varY are lists, 1d numpy.array(s), or dataframe columns, storing the values of two variables\n",
    "   \n",
    "    x, y, Z = kde_scipy(varY, varX, (a,b), (c,d), N )\n",
    "    \n",
    "    data = Data([\n",
    "       Contour(\n",
    "           z=Z, \n",
    "           x=x,\n",
    "           y=y,\n",
    "           colorscale=colorsc,\n",
    "           #reversescale=True,\n",
    "           opacity=0.9,    \n",
    "           contours=Contours(\n",
    "               showlines=False)      \n",
    "        ),        \n",
    "     ])\n",
    "\n",
    "    layout = Layout(\n",
    "        title= title,  \n",
    "        font= Font(family='Georgia, serif',  color='#635F5D'),\n",
    "        showlegend=False,\n",
    "        autosize=False,\n",
    "        width=650,\n",
    "        height=650,\n",
    "        xaxis=XAxis(\n",
    "            range=[a,b],\n",
    "            showgrid=False,\n",
    "            nticks=7\n",
    "        ),\n",
    "        yaxis=YAxis(\n",
    "            range=[c,d],\n",
    "            showgrid=False,\n",
    "            nticks=7\n",
    "        ),\n",
    "        margin=Margin(\n",
    "            l=40,\n",
    "            r=40,\n",
    "            b=85,\n",
    "            t=100,\n",
    "        ),\n",
    "    )\n",
    "     \n",
    "    return Figure( data=data, layout=layout )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plotly.com/~empet/2802.embed\" height=\"650px\" width=\"650px\"></iframe>"
      ],
      "text/plain": [
       "<plotly.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "N=200\n",
    "a,b=(5,11)\n",
    "fig=make_kdeplot(dfc['multiannual'], dfc['bachelor-th'], (a,b), (a,b), \n",
    "                 N, cubehelix_cs,'kde plot of two sets of data' )\n",
    "\n",
    "py.sign_in('empet', 'my_api_key')\n",
    "py.iplot(fig, filename='kde-2D-CSCE')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "### Contour plot of the joint distribution of data from the second file ###"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plotly.com/~empet/2809.embed\" height=\"650px\" width=\"650px\"></iframe>"
      ],
      "text/plain": [
       "<plotly.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a, b=(4,12)\n",
    "fig=make_kdeplot(dfi['multiannual'], dfi['bachelor-th'], (a,b), (a,b),\n",
    "                 N, cubehelix_cs, 'kde plot of two sets of data')\n",
    "py.iplot(fig, filename='kde-2D-SE')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "One notices that the second contourplot illustrates   a [mixture of two bivariate\n",
    "distributions](https://en.wikipedia.org/wiki/Mixture_distribution)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Finally we read a dataframe from a csv  file posted on the Plotly's github account, select  the rows corresponding to `Iris-virginica`, and plot the joint distribution of two virginica features:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plotly.com/~empet/2843.embed\" height=\"650px\" width=\"650px\"></iframe>"
      ],
      "text/plain": [
       "<plotly.tools.PlotlyDisplay object>"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/iris.csv')\n",
    "virginica = df.loc[df.Name == \"Iris-virginica\"]\n",
    "a, b=(5,8.5)\n",
    "c,d=(2,4)\n",
    "N=100\n",
    "fig=make_kdeplot(virginica.SepalLength, virginica.SepalWidth, (a,b), (c,d),\n",
    "    N, cubehelix_cs, 'kde plot of joint distribution for virginica SepalLength and SepalWidth')\n",
    "py.iplot(fig,  filename='virginica-sepal-length-vs-width')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       "    /*body {\n",
       "        background-color: #F5F5F5;\n",
       "    }*/\n",
       "    div.cell{\n",
       "        width: 900px;\n",
       "        margin-left: 13% !important;\n",
       "        margin-right: auto;\n",
       "    }\n",
       "    #notebook li { /* More space between bullet points */\n",
       "    margin-top:0.8em;\n",
       "    }\n",
       "\n",
       "    h1 {\n",
       "        font-family: 'Alegreya Sans', sans-serif;\n",
       "    }\n",
       "    .text_cell_render h1 {\n",
       "        font-weight: 200;\n",
       "        font-size: 40pt;\n",
       "        line-height: 100%;\n",
       "        color: rgb(8, 66, 133);\n",
       "        margin-bottom: 0em;\n",
       "        margin-top: 0em;\n",
       "        display: block;\n",
       "    }\n",
       "    h2 {\n",
       "        font-family: 'Fenix', serif;\n",
       "        text-indent:1em;\n",
       "        text-align:center;\n",
       "    }\n",
       "    .text_cell_render h2 {\n",
       "        font-weight: 200;\n",
       "        font-size: 28pt;\n",
       "        line-height: 100%;\n",
       "        color: rgb(8, 66, 133);\n",
       "        margin-bottom: 1.5em;\n",
       "        margin-top: 0.5em;\n",
       "        display: block;\n",
       "    }\n",
       "    h3 {\n",
       "        font-family: 'Fenix', serif;\n",
       "        %margin-top:12px;\n",
       "        %margin-bottom: 3px;\n",
       "    }\n",
       "    .text_cell_render h3 {\n",
       "        font-weight: 300;\n",
       "        font-size: 18pt;\n",
       "        line-height: 100%;\n",
       "        color: rgb(8, 66, 133);\n",
       "        margin-bottom: 0.5em;\n",
       "        margin-top: 2em;\n",
       "        display: block;\n",
       "    }\n",
       "    h4 {\n",
       "        font-family: 'Fenix', serif;\n",
       "    }\n",
       "    .text_cell_render h4 {\n",
       "        font-weight: 300;\n",
       "        font-size: 16pt;\n",
       "        color: rgb(8, 66, 133);\n",
       "        margin-bottom: 0.5em;\n",
       "        margin-top: 0.5em;\n",
       "        display: block;\n",
       "    }\n",
       "    h5 {\n",
       "        font-family: 'Alegreya Sans', sans-serif;\n",
       "    }\n",
       "    .text_cell_render h5 {\n",
       "        font-weight: 300;\n",
       "        font-style: normal;\n",
       "        font-size: 16pt;\n",
       "        margin-bottom: 0em;\n",
       "        margin-top: 1.5em;\n",
       "        display: block;\n",
       "        }\n",
       "    div.text_cell_render{\n",
       "        font-family: 'Alegreya Sans',Computer Modern, \"Helvetica Neue\", Arial, Helvetica, Geneva, sans-serif;\n",
       "        line-height: 145%;\n",
       "        font-size: 130%;\n",
       "        width:900px;\n",
       "        margin-left:auto;\n",
       "        margin-right:auto;\n",
       "        %text-align:justify;\n",
       "        %text-justify:inter-word;\n",
       "    }\n",
       "    \n",
       "    \n",
       "    code{\n",
       "      font-size: 78%;\n",
       "    }\n",
       "    .rendered_html code{\n",
       "        background-color: transparent;\n",
       "        white-space: inherit;   \n",
       "    }\n",
       "    .prompt{\n",
       "        display: None;\n",
       "     }\n",
       "    .rendered_html code{\n",
       "    background-color: transparent;\n",
       "    }\n",
       "\n",
       "    blockquote{\n",
       "      display:block;\n",
       "      background: #f3f3f3;\n",
       "      font-family: \"Open sans\",verdana,arial,sans-serif;\n",
       "      width:610px;\n",
       "      padding: 15px 15px 15px 15px;\n",
       "      text-align:justify;\n",
       "      text-justify:inter-word;\n",
       "      }\n",
       "      blockquote p {\n",
       "        margin-bottom: 0;\n",
       "        line-height: 125%;\n",
       "        font-size: 100%;\n",
       "      }\n",
       "   /* element.style {\n",
       "    } */\n",
       "</style>\n",
       "<script>\n",
       "    MathJax.Hub.Config({\n",
       "                        TeX: {\n",
       "                           extensions: [\"AMSmath.js\"]\n",
       "                           },\n",
       "                tex2jax: {\n",
       "                    inlineMath: [ [\"$\",\"$\"], [\"\\\\(\",\"\\\\)\"] ],\n",
       "                    displayMath: [ [\"$$\",\"$$\"], [\"\\\\[\",\"\\\\]\"] ]\n",
       "                },\n",
       "                displayAlign: \"center\", // Change this to \"center\" to center equations.\n",
       "                \"HTML-CSS\": {\n",
       "                    styles: {\".MathJax_Display\": {\"margin\": 4}}\n",
       "                }\n",
       "        });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from IPython.core.display import HTML\n",
    "def  css_styling():\n",
    "    styles = open(\"./custom.css\", \"r\").read()\n",
    "    return HTML(styles)\n",
    "css_styling()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<link href=\"//fonts.googleapis.com/css?family=Open+Sans:600,400,300,200|Inconsolata|Ubuntu+Mono:400,700\" rel=\"stylesheet\" type=\"text/css\" />"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<link rel=\"stylesheet\" type=\"text/css\" href=\"https://help.plot.ly/documentation/all_static/css/ipython-notebook-custom.css\">"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "from IPython.display import HTML, display\n",
    "\n",
    "display(HTML('<link href=\"//fonts.googleapis.com/css?family=Open+Sans:600,400,300,200|Inconsolata|Ubuntu+Mono:400,700\" rel=\"stylesheet\" type=\"text/css\" />'))\n",
    "display(HTML('<link rel=\"stylesheet\" type=\"text/css\" href=\"https://help.plot.ly/documentation/all_static/css/ipython-notebook-custom.css\">'))\n",
    "\n",
    "import publisher\n",
    "publisher.publish('2d-kernel-density-distributions', '/ipython-notebooks/2d-kernel-density-distributions/', \n",
    "                  '2d Kernel Density Distributions', \n",
    "                  '2D Kernel Density Distributions Using Plotly')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}