{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Kernel Density Estimation. Plotly plot of the joint pdf and marginal pdf estimations" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read data from an Excel file:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Index([u'multiannual', u'bachelor-th'], dtype='object')" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "xl = pd.ExcelFile(\"CSCEng.xls\")\n", "dfc = xl.parse(\"Sheet1\")\n", "dfc.columns" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
multiannualbachelor-th
08.017.95
18.638.63
27.038.37
38.538.05
48.419.53
\n", "
" ], "text/plain": [ " multiannual bachelor-th\n", "0 8.01 7.95\n", "1 8.63 8.63\n", "2 7.03 8.37\n", "3 8.53 8.05\n", "4 8.41 9.53" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfc.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We estimate the joint pdf of the two columns `dfc['multiannual]'`, `dfc['bachelor-th]'`, using a gaussian kernel: " ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import scipy.stats as st\n", "def kde_scipy( vals1, vals2, (a,b), (c,d), N ):\n", " \n", " #vals1, vals2 are the values of two variables \n", " #(a,b) interval for vals1; usually larger than (np.min(vals1), np.max(vals1))\n", " #(c,d) -\"- vals2 \n", " \n", " x=np.linspace(a,b,N)\n", " y=np.linspace(c,d,N)\n", " X,Y=np.meshgrid(x,y)\n", " positions = np.vstack([Y.ravel(), X.ravel()]) #X.ravel() concatenates the rows of X\n", "\n", " values = np.vstack([vals1, vals2])\n", " kernel = st.gaussian_kde(values)\n", " Z = np.reshape(kernel(positions).T, X.shape)\n", " \n", " return [x, y, Z]# return x, y, Z to be passed to Plotly for plotting the contour of joint pdf" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "a,b=(5,11) # joint pdf is evaluated at the N xN grid points of the square [a,b] x[a,b]\n", "N=200" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "x=list(dfc['multiannual'])\n", "y=list(dfc['bachelor-th'])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pdfx= st.gaussian_kde(x) #estimation of the pdfx from x-values\n", "pdfy=st.gaussian_kde(y) \n", "X=np.linspace(a, b, 100)\n", "Y=pdfx(X)#evaluate the pdfx at X\n", "yy=np.linspace(a, b, 100)\n", "xx=pdfy(yy)# the pdfy is a function of y-variable" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "Xvals, Yvals, Zvals = kde_scipy( dfc['bachelor-th'],dfc['multiannual'], (a,b), (a,b), N )\n", " #attn: here we reversed the columns order " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define Data and Layout for Plotly plot:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import plotly.plotly as py\n", "from plotly.graph_objs import * " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set the text to be displayed when hovering the mouse over the contour plot of the joint pdf:" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [], "source": [ "hover_xy=[\n", "['f('+'{:0.2f}'.format(Xvals[j])+', '+'{:0.2f}'.format(Yvals[i])+')= '+'{:0.2f}'.format(Zvals[i][j])+')'\n", " for j in range(len(Xvals))] for i in range(len(Yvals)) ]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'f(6.72, 6.87)= 0.05)'" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hover_xy[62][57]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Plotly version of the `matplotlib` `cmocean.salinity` colormap:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pl_salinity=[[0.0, 'rgb(41,24,107)'],\n", " [0.05, 'rgb(45,27,137)'],\n", " [0.1, 'rgb(40,39,162)'],\n", " [0.15, 'rgb(24,61,158)'],\n", " [0.2, 'rgb(12,77,150)'],\n", " [0.25, 'rgb(15,91,144)'],\n", " [0.3, 'rgb(24,102,140)'],\n", " [0.35, 'rgb(35,113,138)'],\n", " [0.4, 'rgb(44,124,136)'],\n", " [0.45, 'rgb(52,135,136)'],\n", " [0.5, 'rgb(59,147,135)'],\n", " [0.55, 'rgb(66,158,132)'],\n", " [0.6, 'rgb(74,169,128)'],\n", " [0.65, 'rgb(85,181,122)'],\n", " [0.7, 'rgb(100,193,113)'],\n", " [0.75, 'rgb(122,203,102)'],\n", " [0.8, 'rgb(148,211,93)'],\n", " [0.85, 'rgb(179,217,94)'],\n", " [0.9, 'rgb(208,224,109)'],\n", " [0.95, 'rgb(232,231,131)'],\n", " [1.0, 'rgb(253,238,153)']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define a Contour object:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "trace1= Contour(\n", " z=Zvals, \n", " x=Xvals,\n", " y=Yvals,\n", " colorscale=pl_salinity,\n", " showscale=False,\n", " text=hover_xy,\n", " hoverinfo='text',\n", " contours=Contours(\n", " showlines=False), \n", " ) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set hover text for the two marginal pdfs:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "textx=['(x,g(x))=('+'{:0.2f}'.format(X[i])+', '+'{:0.2f}'.format(Y[i])+')' for i in range(len(X))]\n", "texty=['(y,h(y))=('+'{:0.2f}'.format(yy[i])+', '+'{:0.2f}'.format(xx[i])+')' for i in range(len(yy))]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "trace2 = Scatter(# Scatter object for the marginal pdf g(x)\n", " x=X, \n", " y=Y,\n", " name='pdf-x',\n", " mode='lines',\n", " fill='tozeroy',\n", " fillcolor='rgb(122,203,102)', \n", " line=Line(width=2, color='rgb(66,158,132)', shape='spline'), \n", " xaxis='x1',\n", " yaxis='y2',\n", " text=textx,\n", " hoverinfo='text',\n", " \n", ")\n", "trace3 = Scatter(# Scatter object for the marginal pdf h(y)\n", " x=xx, \n", " y=yy,\n", " name='pdf-y',\n", " mode='lines',\n", " fill='tozerox',\n", " fillcolor='rgb(122,203,102)',\n", " line=Line(width=2, color='rgb(66,158,132)', shape='spline'), \n", " text=texty,\n", " hoverinfo='text', \n", " xaxis='x2',\n", " yaxis='y1'\n", ")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = Data([trace1, trace2, trace3])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set the plot layout:" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [], "source": [ "layout=Layout(title='Kernel Density Estimation',\n", " autosize=False,\n", " font=Font(size=11),\n", " height=550,\n", " showlegend=False,\n", " width=650,\n", " xaxis=XAxis(\n", " showgrid=False,\n", " domain=[0, 0.8],\n", " range=[a, b],\n", " title='x',\n", " titlefont=Font(size=11),\n", " zeroline=False, \n", " tickvals=[6,7,8,9,10, 11]\n", " ),\n", " xaxis2=XAxis(\n", " domain=[0.82, 1],\n", " showgrid=False,\n", " zeroline=False,\n", " side='top',\n", " ticklen=4,\n", " ),\n", " yaxis=YAxis(\n", " domain=[0, 0.8],\n", " range=[a, b],\n", " showgrid=False,\n", " title='y',\n", " zeroline=False,\n", " titlefont=Font(size=11),\n", " ),\n", " yaxis2=YAxis(\n", " domain=[0.82, 1],\n", " showgrid=False,\n", " zeroline=False,\n", " ticklen=4,\n", " \n", " \n", " ), \n", " margin=Margin(t=50),\n", " hovermode='closest',\n", ")\n", "fig = Figure(data=data, layout=layout)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import plotly\n", "plotly.offline.init_notebook_mode() " ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.core.display import HTML\n", "def css_styling():\n", " styles = open(\"./custom.css\", \"r\").read()\n", " return HTML(styles)\n", "css_styling()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 0 }