{ "metadata": { "name": "", "signature": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Box Plots" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The following illustrates some options for the boxplot in statsmodels. These include `violin_plot` and `bean_plot`." ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import statsmodels.api as sm" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Bean Plots" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The following example is taken from the docstring of `beanplot`.\n", "\n", "We use the American National Election Survey 1996 dataset, which has Party\n", "Identification of respondents as independent variable and (among other\n", "data) age as dependent variable." ] }, { "cell_type": "code", "collapsed": false, "input": [ "data = sm.datasets.anes96.load_pandas()\n", "party_ID = np.arange(7)\n", "labels = [\"Strong Democrat\", \"Weak Democrat\", \"Independent-Democrat\",\n", " \"Independent-Independent\", \"Independent-Republican\",\n", " \"Weak Republican\", \"Strong Republican\"]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Group age by party ID, and create a violin plot with it:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "plt.rcParams['figure.subplot.bottom'] = 0.23 # keep labels visible\n", "plt.rcParams['figure.figsize'] = (10.0, 8.0) # make plot larger in notebook\n", "age = [data.exog['age'][data.endog == id] for id in party_ID]\n", "fig = plt.figure()\n", "ax = fig.add_subplot(111)\n", "plot_opts={'cutoff_val':5, 'cutoff_type':'abs',\n", " 'label_fontsize':'small',\n", " 'label_rotation':30}\n", "sm.graphics.beanplot(age, ax=ax, labels=labels,\n", " plot_opts=plot_opts)\n", "ax.set_xlabel(\"Party identification of respondent.\")\n", "ax.set_ylabel(\"Age\")\n", "#plt.show()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "def beanplot(data, plot_opts={}, jitter=False):\n", " \"\"\"helper function to try out different plot options\n", " \"\"\"\n", " fig = plt.figure()\n", " ax = fig.add_subplot(111)\n", " plot_opts_ = {'cutoff_val':5, 'cutoff_type':'abs',\n", " 'label_fontsize':'small',\n", " 'label_rotation':30}\n", " plot_opts_.update(plot_opts)\n", " sm.graphics.beanplot(data, ax=ax, labels=labels,\n", " jitter=jitter, plot_opts=plot_opts_)\n", " ax.set_xlabel(\"Party identification of respondent.\")\n", " ax.set_ylabel(\"Age\")" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = beanplot(age, jitter=True)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = beanplot(age, plot_opts={'violin_width': 0.5, 'violin_fc':'#66c2a5'})" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = beanplot(age, plot_opts={'violin_fc':'#66c2a5'})" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = beanplot(age, plot_opts={'bean_size': 0.2, 'violin_width': 0.75, 'violin_fc':'#66c2a5'})" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = beanplot(age, jitter=True, plot_opts={'violin_fc':'#66c2a5'})" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = beanplot(age, jitter=True, plot_opts={'violin_width': 0.5, 'violin_fc':'#66c2a5'})" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Advanced Box Plots" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Based of example script `example_enhanced_boxplots.py` (by Ralf Gommers)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import print_function\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "import statsmodels.api as sm\n", "\n", "\n", "# Necessary to make horizontal axis labels fit\n", "plt.rcParams['figure.subplot.bottom'] = 0.23\n", "\n", "data = sm.datasets.anes96.load_pandas()\n", "party_ID = np.arange(7)\n", "labels = [\"Strong Democrat\", \"Weak Democrat\", \"Independent-Democrat\",\n", " \"Independent-Independent\", \"Independent-Republican\",\n", " \"Weak Republican\", \"Strong Republican\"]\n", "\n", "# Group age by party ID.\n", "age = [data.exog['age'][data.endog == id] for id in party_ID]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# Create a violin plot.\n", "fig = plt.figure()\n", "ax = fig.add_subplot(111)\n", "\n", "sm.graphics.violinplot(age, ax=ax, labels=labels,\n", " plot_opts={'cutoff_val':5, 'cutoff_type':'abs',\n", " 'label_fontsize':'small',\n", " 'label_rotation':30})\n", "\n", "ax.set_xlabel(\"Party identification of respondent.\")\n", "ax.set_ylabel(\"Age\")\n", "ax.set_title(\"US national election '96 - Age & Party Identification\")" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# Create a bean plot.\n", "fig2 = plt.figure()\n", "ax = fig2.add_subplot(111)\n", "\n", "sm.graphics.beanplot(age, ax=ax, labels=labels,\n", " plot_opts={'cutoff_val':5, 'cutoff_type':'abs',\n", " 'label_fontsize':'small',\n", " 'label_rotation':30})\n", "\n", "ax.set_xlabel(\"Party identification of respondent.\")\n", "ax.set_ylabel(\"Age\")\n", "ax.set_title(\"US national election '96 - Age & Party Identification\")" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# Create a jitter plot.\n", "fig3 = plt.figure()\n", "ax = fig3.add_subplot(111)\n", "\n", "plot_opts={'cutoff_val':5, 'cutoff_type':'abs', 'label_fontsize':'small',\n", " 'label_rotation':30, 'violin_fc':(0.8, 0.8, 0.8),\n", " 'jitter_marker':'.', 'jitter_marker_size':3, 'bean_color':'#FF6F00',\n", " 'bean_mean_color':'#009D91'}\n", "sm.graphics.beanplot(age, ax=ax, labels=labels, jitter=True,\n", " plot_opts=plot_opts)\n", "\n", "ax.set_xlabel(\"Party identification of respondent.\")\n", "ax.set_ylabel(\"Age\")\n", "ax.set_title(\"US national election '96 - Age & Party Identification\")" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# Create an asymmetrical jitter plot.\n", "ix = data.exog['income'] < 16 # incomes < $30k\n", "age = data.exog['age'][ix]\n", "endog = data.endog[ix]\n", "age_lower_income = [age[endog == id] for id in party_ID]\n", "\n", "ix = data.exog['income'] >= 20 # incomes > $50k\n", "age = data.exog['age'][ix]\n", "endog = data.endog[ix]\n", "age_higher_income = [age[endog == id] for id in party_ID]\n", "\n", "fig = plt.figure()\n", "ax = fig.add_subplot(111)\n", "\n", "plot_opts['violin_fc'] = (0.5, 0.5, 0.5)\n", "plot_opts['bean_show_mean'] = False\n", "plot_opts['bean_show_median'] = False\n", "plot_opts['bean_legend_text'] = 'Income < \\$30k'\n", "plot_opts['cutoff_val'] = 10\n", "sm.graphics.beanplot(age_lower_income, ax=ax, labels=labels, side='left',\n", " jitter=True, plot_opts=plot_opts)\n", "plot_opts['violin_fc'] = (0.7, 0.7, 0.7)\n", "plot_opts['bean_color'] = '#009D91'\n", "plot_opts['bean_legend_text'] = 'Income > \\$50k'\n", "sm.graphics.beanplot(age_higher_income, ax=ax, labels=labels, side='right',\n", " jitter=True, plot_opts=plot_opts)\n", "\n", "ax.set_xlabel(\"Party identification of respondent.\")\n", "ax.set_ylabel(\"Age\")\n", "ax.set_title(\"US national election '96 - Age & Party Identification\")\n", "\n", "\n", "# Show all plots.\n", "#plt.show()" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }