{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Use GeoPandas and Bokeh to build a choropleth plot\n", "\n", "##### Germain Salvato Vallverdu [`germain.vallverdu@gmail.com`](germain.vallverdu@gmail.com)\n", "\n", "This notebook shows how to build a choropleth plot using [geopandas](http://geopandas.org/) \n", "in order to read a shapefile and [bokeh](http://bokeh.pydata.org/) in order to draw the \n", "choropleth plot.\n", "\n", "U.S. states and councils are available from core packages of bokeh. In the case of France you \n", "have to download the data from a government website. In this example, I used the data from \n", "[IGN website](http://professionnels.ign.fr/geofla#tab-3). I downloaded only metropolitan france\n", "departments.\n", "\n", "The main strategy is the following :\n", "\n", "1. [Read data you want to put on the map with pandas](#1.-Read-data-from-csv)\n", "2. [Read the shapefile with geopandas](#2.-Read-GIS-data-in-a-shapefile)\n", "3. [Merge the data frames](#3.-Merge-the-data-frames)\n", "4. [Set up the plot](#4.-Set-up-the-plot)\n", "\n", "## Used pacakges\n", "\n", "import used pacakges :\n", "\n", "* pandas for data\n", "* geopandas for Geographic Information System\n", "* bokeh for the plot" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(global) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = \"1\";\n", "\n", " if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force !== \"\") {\n", " window._bokeh_onload_callbacks = [];\n", " window._bokeh_is_loading = undefined;\n", " }\n", "\n", "\n", " \n", " if (typeof (window._bokeh_timeout) === \"undefined\" || force !== \"\") {\n", " window._bokeh_timeout = Date.now() + 5000;\n", " window._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " if (window.Bokeh !== undefined) {\n", " Bokeh.$(\"#845e97b0-e48f-406e-843b-f2f404496c82\").text(\"BokehJS successfully loaded.\");\n", " } else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", " function run_callbacks() {\n", " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", " delete window._bokeh_onload_callbacks\n", " console.info(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(js_urls, callback) {\n", " window._bokeh_onload_callbacks.push(callback);\n", " if (window._bokeh_is_loading > 0) {\n", " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " window._bokeh_is_loading = js_urls.length;\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var s = document.createElement('script');\n", " s.src = url;\n", " s.async = false;\n", " s.onreadystatechange = s.onload = function() {\n", " window._bokeh_is_loading--;\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", " run_callbacks()\n", " }\n", " };\n", " s.onerror = function() {\n", " console.warn(\"failed to load library \" + url);\n", " };\n", " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", " }\n", " };var element = document.getElementById(\"845e97b0-e48f-406e-843b-f2f404496c82\");\n", " if (element == null) {\n", " console.log(\"Bokeh: ERROR: autoload.js configured with elementid '845e97b0-e48f-406e-843b-f2f404496c82' but no matching script tag was found. \")\n", " return false;\n", " }\n", "\n", " var js_urls = ['https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.js', 'https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.js'];\n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " \n", " function(Bokeh) {\n", " \n", " Bokeh.$(\"#845e97b0-e48f-406e-843b-f2f404496c82\").text(\"BokehJS is loading...\");\n", " },\n", " function(Bokeh) {\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.3.min.css\");\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.3.min.css\");\n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " \n", " if ((window.Bokeh !== undefined) || (force === \"1\")) {\n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i](window.Bokeh);\n", " }if (force === \"1\") {\n", " display_loaded();\n", " }} else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!window._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " window._bokeh_failed_load = true;\n", " } else if (!force) {\n", " var cell = $(\"#845e97b0-e48f-406e-843b-f2f404496c82\").parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", "\n", " }\n", "\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(js_urls, function() {\n", " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(this));" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "\n", "from bokeh.io import show, output_notebook\n", "from bokeh.plotting import figure\n", "import bokeh.models as bm\n", "import bokeh.palettes\n", "\n", "# set up bokeh for jupyter notebook\n", "output_notebook()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Read data from csv\n", "\n", "First, read the data from a csv file. Here I did the following :\n", "\n", "1. Read the csv file\n", "2. extract data for france only\n", "3. set up an index with the department number as a string with 2 digits." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nombre de membresPaysDepartement
CODE_DEPT
018FR1.0
021FR2.0
031FR3.0
043FR4.0
051FR5.0
\n", "
" ], "text/plain": [ " Nombre de membres Pays Departement\n", "CODE_DEPT \n", "01 8 FR 1.0\n", "02 1 FR 2.0\n", "03 1 FR 3.0\n", "04 3 FR 4.0\n", "05 1 FR 5.0" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(\"membres_par_pays_dep.csv\", sep=\";\")\n", "df = df[df[\"Pays\"] == \"FR\"]\n", "df.rename(columns={df.columns[2]: \"Departement\"}, inplace=True)\n", "df.index = df.Departement.apply(lambda x: \"%02d\" % x)\n", "df.index.name = \"CODE_DEPT\"\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Read GIS data in a shapefile\n", "\n", "Now we read a shape file and set up the same index with the department number.\n", "\n", "The last column of a `GeoDataFrame` is a geometry object, here a polygon." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NOM_CHFNOM_DEPTgeometry
CODE_DEPT
01BOURG-EN-BRESSEAINPOLYGON ((838243.5999999591 6564210.300001801,...
02LAONAISNEPOLYGON ((708718.999999999 6956305.000000671, ...
03MOULINSALLIERPOLYGON ((664479.2000000098 6602292.300001685,...
04DIGNE-LES-BAINSALPES-DE-HAUTE-PROVENCEPOLYGON ((910437.2999999194 6342569.70000242, ...
05GAPHAUTES-ALPESPOLYGON ((933489.9999999163 6411083.800002239,...
\n", "
" ], "text/plain": [ " NOM_CHF NOM_DEPT \\\n", "CODE_DEPT \n", "01 BOURG-EN-BRESSE AIN \n", "02 LAON AISNE \n", "03 MOULINS ALLIER \n", "04 DIGNE-LES-BAINS ALPES-DE-HAUTE-PROVENCE \n", "05 GAP HAUTES-ALPES \n", "\n", " geometry \n", "CODE_DEPT \n", "01 POLYGON ((838243.5999999591 6564210.300001801,... \n", "02 POLYGON ((708718.999999999 6956305.000000671, ... \n", "03 POLYGON ((664479.2000000098 6602292.300001685,... \n", "04 POLYGON ((910437.2999999194 6342569.70000242, ... \n", "05 POLYGON ((933489.9999999163 6411083.800002239,... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gdf = gpd.GeoDataFrame.from_file(\"DEPARTEMENT/DEPARTEMENT.shp\")\n", "gdf = gdf[[\"CODE_DEPT\", \"NOM_CHF\", \"NOM_DEPT\", \"geometry\"]]\n", "gdf.set_index(\"CODE_DEPT\", inplace=True)\n", "gdf.sort_index(inplace=True)\n", "gdf.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Merge the data frames\n", "\n", "Now we merge the data frames. As they have the same index, you only need to call the `join` method.\n", "\n", "We only keep the column with the number of members and set `NaN` values to zero." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NOM_CHFNOM_DEPTgeometrymembres
CODE_DEPT
01BOURG-EN-BRESSEAINPOLYGON ((838243.5999999591 6564210.300001801,...8.0
02LAONAISNEPOLYGON ((708718.999999999 6956305.000000671, ...1.0
03MOULINSALLIERPOLYGON ((664479.2000000098 6602292.300001685,...1.0
04DIGNE-LES-BAINSALPES-DE-HAUTE-PROVENCEPOLYGON ((910437.2999999194 6342569.70000242, ...3.0
05GAPHAUTES-ALPESPOLYGON ((933489.9999999163 6411083.800002239,...1.0
\n", "
" ], "text/plain": [ " NOM_CHF NOM_DEPT \\\n", "CODE_DEPT \n", "01 BOURG-EN-BRESSE AIN \n", "02 LAON AISNE \n", "03 MOULINS ALLIER \n", "04 DIGNE-LES-BAINS ALPES-DE-HAUTE-PROVENCE \n", "05 GAP HAUTES-ALPES \n", "\n", " geometry membres \n", "CODE_DEPT \n", "01 POLYGON ((838243.5999999591 6564210.300001801,... 8.0 \n", "02 POLYGON ((708718.999999999 6956305.000000671, ... 1.0 \n", "03 POLYGON ((664479.2000000098 6602292.300001685,... 1.0 \n", "04 POLYGON ((910437.2999999194 6342569.70000242, ... 3.0 \n", "05 POLYGON ((933489.9999999163 6411083.800002239,... 1.0 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gdf2 = gdf.join(df[\"Nombre de membres\"])\n", "gdf2.rename(columns={\"Nombre de membres\": \"membres\"}, inplace=True)\n", "gdf2.fillna(value=0., inplace=True)\n", "gdf2.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Set up the plot\n", "\n", "First we convert the `GeoDataFrame` in a `GeoJSONDataSource` bokeh model." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "geo_src = bm.GeoJSONDataSource(geojson=gdf2.to_json())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we set up a show the plot." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# set up a log colormap\n", "cmap = bm.LogColorMapper(\n", " palette=bokeh.palettes.BuGn9[::-1], # reverse the palette\n", " low=0, \n", " high=gdf2.membres.max()\n", ")\n", "\n", "# define web tools\n", "TOOLS = \"pan,wheel_zoom,box_zoom,reset,hover,save\"\n", "\n", "# set up bokeh figure\n", "p = figure(\n", " title=\"Membres de l'AFPY en 2016\", \n", " tools=TOOLS,\n", " toolbar_location=\"below\",\n", " x_axis_location=None, \n", " y_axis_location=None, \n", " width=500, \n", " height=500\n", ")\n", "\n", "# remove the grid\n", "p.grid.grid_line_color = None\n", "\n", "# core part !\n", "# * add a patch for each polygon in the geo data frame\n", "# * fill color from column 'membres' using the color map defined above\n", "p.patches(\n", " 'xs', 'ys', \n", " fill_alpha=0.7, \n", " fill_color={'field': 'membres', 'transform': cmap},\n", " line_color='black', \n", " line_width=0.5, \n", " source=geo_src\n", ")\n", "\n", "# set up mouse hover informations\n", "hover = p.select_one(bm.HoverTool)\n", "hover.point_policy = 'follow_mouse'\n", "hover.tooltips = [\n", " ('Département:', '@NOM_DEPT'), \n", " (\"Membres:\", \"@membres\"), \n", " (\"Contact:\", \"??\"), \n", " (\"Afpyro:\", \"True/False\")\n", "]\n", "\n", "# add a color bar\n", "color_bar = bm.ColorBar(\n", " color_mapper=cmap,\n", " ticker=bm.LogTicker(),\n", " title_text_align=\"left\",\n", " location=(0, 0),\n", " border_line_color=None,\n", " title=\"Membres\"\n", ")\n", "p.add_layout(color_bar, 'right')\n", "\n", "# show plot\n", "show(p)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from bokeh.plotting import output_file\n", "output_file(\"afpy_france.html\")" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda env:geopandas]", "language": "python", "name": "conda-env-geopandas-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }