{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Gapminder data choropleths" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import plotly.plotly as py\n", "import plotly.tools as tls\n", "import pandas as pd\n", "import json\n", "import urllib2\n", "import re" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The most famous gapminder data is featured in [Hans Rosling](https://en.wikipedia.org/wiki/Hans_Rosling)'s bubble charts (see a plotly version [here](https://plot.ly/~etpinard/191/fig-31b-hans-roslings-bubble-chart-for-the-year-2007/)).\n", "\n", "In this notebook, we'll explore the same dataset using plotly choropleths." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countryyearpopcontinentlifeExpgdpPercap
0 Afghanistan 1952 8425333 Asia 28.801 779.445314
1 Afghanistan 1957 9240934 Asia 30.332 820.853030
2 Afghanistan 1962 10267083 Asia 31.997 853.100710
3 Afghanistan 1967 11537966 Asia 34.020 836.197138
4 Afghanistan 1972 13079460 Asia 36.088 739.981106
\n", "

5 rows × 6 columns

\n", "
" ], "text/plain": [ " country year pop continent lifeExp gdpPercap\n", "0 Afghanistan 1952 8425333 Asia 28.801 779.445314\n", "1 Afghanistan 1957 9240934 Asia 30.332 820.853030\n", "2 Afghanistan 1962 10267083 Asia 31.997 853.100710\n", "3 Afghanistan 1967 11537966 Asia 34.020 836.197138\n", "4 Afghanistan 1972 13079460 Asia 36.088 739.981106\n", "\n", "[5 rows x 6 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# big thanks to Jennifer Bryan!\n", "df_full = pd.read_csv('http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt', sep='\\t')\n", "\n", "df_full.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Select a year and truncate the dataframe" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Choose a year, find other years with df['year'].unique()\n", "the_year = 2007 \n", "\n", "# Find indices corresponding to 'the_year'\n", "i_year = (df_full['year'] == the_year)\n", "\n", "# Grab all rows correponding to 'the_year'\n", "df = df_full[i_year] " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Define a plot function" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def plot(z, title, units):\n", " if units == '$':\n", " colorbar= dict(\n", " tickprefix=' ' + units,\n", " showtickprefix='last' \n", " )\n", " else:\n", " colorbar = dict(\n", " ticksuffix=' ' + units,\n", " showticksuffix='last'\n", " )\n", " \n", " url = py.plot(\n", " dict(\n", " data=[\n", " dict(\n", " type='choropleth',\n", " locationmode='country names',\n", " locations=df['country'],\n", " z=z,\n", " colorbar=colorbar\n", " )\n", " ],\n", " layout=dict(\n", " title=title + ' by country in ' + str(the_year),\n", " titlefont=dict(\n", " size=22\n", " ),\n", " geo=dict(\n", " projection=dict(\n", " type='kavrayskiy7'\n", " )\n", " ),\n", " width=1000,\n", " height=600\n", " )\n", " ),\n", " validate=False,\n", " filename=title.lower().replace(' ', '-'),\n", " auto_open=False\n", " )\n", " print(url)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the plot function for each of the three dependent variables" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://plot.ly/~etpinard/4250\n", "https://plot.ly/~etpinard/4252\n", "https://plot.ly/~etpinard/4254\n" ] } ], "source": [ "plot(df['pop'], 'World population', 'million')\n", "plot(df['lifeExp'], 'Life expectancy', 'year')\n", "plot(df['gdpPercap'], 'GDP per capita', '$')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tls.embed('https://plot.ly/~etpinard/4250')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We note that the Russia is not part of the dataset." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tls.embed('https://plot.ly/~etpinard/4252')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tls.embed('https://plot.ly/~etpinard/4254')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Inject CSS styling in the NB\n", "from IPython.display import display, HTML\n", "display(HTML(open('../_custom.css').read()))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }