{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Plotting and Visualization" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "PREVIOUS_MAX_ROWS = pd.options.display.max_rows\n", "pd.options.display.max_rows = 20\n", "np.random.seed(12345)\n", "import matplotlib.pyplot as plt\n", "import matplotlib\n", "plt.rc('figure', figsize=(10, 6))\n", "np.set_printoptions(precision=4, suppress=True)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "%matplotlib notebook" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## A Brief matplotlib API Primer" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import numpy as np\n", "data = np.arange(10)\n", "data\n", "plt.plot(data)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Figures and Subplots" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "fig = plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ax1 = fig.add_subplot(2, 2, 1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ax2 = fig.add_subplot(2, 2, 2)\n", "ax3 = fig.add_subplot(2, 2, 3)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "fig = plt.figure()\n", "ax1 = fig.add_subplot(2, 2, 1)\n", "ax2 = fig.add_subplot(2, 2, 2)\n", "ax3 = fig.add_subplot(2, 2, 3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.plot(np.random.randn(50).cumsum(), 'k--')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "_ = ax1.hist(np.random.randn(100), bins=20, color='k', alpha=0.3)\n", "ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "fig, axes = plt.subplots(2, 3)\n", "axes" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Adjusting the spacing around subplots" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "subplots_adjust(left=None, bottom=None, right=None, top=None,\n", " wspace=None, hspace=None)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)\n", "for i in range(2):\n", " for j in range(2):\n", " axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)\n", "plt.subplots_adjust(wspace=0, hspace=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)\n", "for i in range(2):\n", " for j in range(2):\n", " axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)\n", "plt.subplots_adjust(wspace=0, hspace=0)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Colors, Markers, and Line Styles" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "ax.plot(x, y, 'g--')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "ax.plot(x, y, linestyle='--', color='g')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from numpy.random import randn\n", "plt.plot(randn(30).cumsum(), 'ko--')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "plot(randn(30).cumsum(), color='k', linestyle='dashed', marker='o')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "data = np.random.randn(30).cumsum()\n", "plt.plot(data, 'k--', label='Default')\n", "plt.plot(data, 'k-', drawstyle='steps-post', label='steps-post')\n", "plt.legend(loc='best')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Ticks, Labels, and Legends" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Setting the title, axis labels, ticks, and ticklabels" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "fig = plt.figure()\n", "ax = fig.add_subplot(1, 1, 1)\n", "ax.plot(np.random.randn(1000).cumsum())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ticks = ax.set_xticks([0, 250, 500, 750, 1000])\n", "labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'],\n", " rotation=30, fontsize='small')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ax.set_title('My first matplotlib plot')\n", "ax.set_xlabel('Stages')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "props = {\n", " 'title': 'My first matplotlib plot',\n", " 'xlabel': 'Stages'\n", "}\n", "ax.set(**props)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Adding legends" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from numpy.random import randn\n", "fig = plt.figure(); ax = fig.add_subplot(1, 1, 1)\n", "ax.plot(randn(1000).cumsum(), 'k', label='one')\n", "ax.plot(randn(1000).cumsum(), 'k--', label='two')\n", "ax.plot(randn(1000).cumsum(), 'k.', label='three')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ax.legend(loc='best')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Annotations and Drawing on a Subplot" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "ax.text(x, y, 'Hello world!',\n", " family='monospace', fontsize=10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from datetime import datetime\n", "\n", "fig = plt.figure()\n", "ax = fig.add_subplot(1, 1, 1)\n", "\n", "data = pd.read_csv('examples/spx.csv', index_col=0, parse_dates=True)\n", "spx = data['SPX']\n", "\n", "spx.plot(ax=ax, style='k-')\n", "\n", "crisis_data = [\n", " (datetime(2007, 10, 11), 'Peak of bull market'),\n", " (datetime(2008, 3, 12), 'Bear Stearns Fails'),\n", " (datetime(2008, 9, 15), 'Lehman Bankruptcy')\n", "]\n", "\n", "for date, label in crisis_data:\n", " ax.annotate(label, xy=(date, spx.asof(date) + 75),\n", " xytext=(date, spx.asof(date) + 225),\n", " arrowprops=dict(facecolor='black', headwidth=4, width=2,\n", " headlength=4),\n", " horizontalalignment='left', verticalalignment='top')\n", "\n", "# Zoom in on 2007-2010\n", "ax.set_xlim(['1/1/2007', '1/1/2011'])\n", "ax.set_ylim([600, 1800])\n", "\n", "ax.set_title('Important dates in the 2008-2009 financial crisis')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ax.set_title('Important dates in the 2008–2009 financial crisis')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "fig = plt.figure()\n", "ax = fig.add_subplot(1, 1, 1)\n", "\n", "rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)\n", "circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)\n", "pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],\n", " color='g', alpha=0.5)\n", "\n", "ax.add_patch(rect)\n", "ax.add_patch(circ)\n", "ax.add_patch(pgon)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "fig = plt.figure(figsize=(12, 6)); ax = fig.add_subplot(1, 1, 1)\n", "rect = plt.Rectangle((0.2, 0.75), 0.4, 0.15, color='k', alpha=0.3)\n", "circ = plt.Circle((0.7, 0.2), 0.15, color='b', alpha=0.3)\n", "pgon = plt.Polygon([[0.15, 0.15], [0.35, 0.4], [0.2, 0.6]],\n", " color='g', alpha=0.5)\n", "ax.add_patch(rect)\n", "ax.add_patch(circ)\n", "ax.add_patch(pgon)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Saving Plots to File" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "plt.savefig('figpath.svg')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "plt.savefig('figpath.png', dpi=400, bbox_inches='tight')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "from io import BytesIO\n", "buffer = BytesIO()\n", "plt.savefig(buffer)\n", "plot_data = buffer.getvalue()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### matplotlib Configuration" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "plt.rc('figure', figsize=(10, 10))" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "font_options = {'family' : 'monospace',\n", " 'weight' : 'bold',\n", " 'size' : 'small'}\n", "plt.rc('font', **font_options)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Plotting with pandas and seaborn" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Line Plots" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))\n", "s.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "df = pd.DataFrame(np.random.randn(10, 4).cumsum(0),\n", " columns=['A', 'B', 'C', 'D'],\n", " index=np.arange(0, 100, 10))\n", "df.plot()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Bar Plots" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "fig, axes = plt.subplots(2, 1)\n", "data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))\n", "data.plot.bar(ax=axes[0], color='k', alpha=0.7)\n", "data.plot.barh(ax=axes[1], color='k', alpha=0.7)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "np.random.seed(12348)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "df = pd.DataFrame(np.random.rand(6, 4),\n", " index=['one', 'two', 'three', 'four', 'five', 'six'],\n", " columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))\n", "df\n", "df.plot.bar()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "df.plot.barh(stacked=True, alpha=0.5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tips = pd.read_csv('examples/tips.csv')\n", "party_counts = pd.crosstab(tips['day'], tips['size'])\n", "party_counts\n", "# Not many 1- and 6-person parties\n", "party_counts = party_counts.loc[:, 2:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "# Normalize to sum to 1\n", "party_pcts = party_counts.div(party_counts.sum(1), axis=0)\n", "party_pcts\n", "party_pcts.plot.bar()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import seaborn as sns\n", "tips['tip_pct'] = tips['tip'] / (tips['total_bill'] - tips['tip'])\n", "tips.head()\n", "sns.barplot(x='tip_pct', y='day', data=tips, orient='h')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sns.barplot(x='tip_pct', y='day', hue='time', data=tips, orient='h')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sns.set(style=\"whitegrid\")" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Histograms and Density Plots" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tips['tip_pct'].plot.hist(bins=50)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tips['tip_pct'].plot.density()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "comp1 = np.random.normal(0, 1, size=200)\n", "comp2 = np.random.normal(10, 2, size=200)\n", "values = pd.Series(np.concatenate([comp1, comp2]))\n", "sns.distplot(values, bins=100, color='k')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Scatter or Point Plots" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "macro = pd.read_csv('examples/macrodata.csv')\n", "data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]\n", "trans_data = np.log(data).diff().dropna()\n", "trans_data[-5:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sns.regplot('m1', 'unemp', data=trans_data)\n", "plt.title('Changes in log %s versus log %s' % ('m1', 'unemp'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sns.pairplot(trans_data, diag_kind='kde', plot_kws={'alpha': 0.2})" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Facet Grids and Categorical Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sns.factorplot(x='day', y='tip_pct', hue='time', col='smoker',\n", " kind='bar', data=tips[tips.tip_pct < 1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sns.factorplot(x='day', y='tip_pct', row='time',\n", " col='smoker',\n", " kind='bar', data=tips[tips.tip_pct < 1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "sns.factorplot(x='tip_pct', y='day', kind='box',\n", " data=tips[tips.tip_pct < 0.5])" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Other Python Visualization Tools" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.options.display.max_rows = PREVIOUS_MAX_ROWS" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Conclusion" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 0 }