{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Breweries in the United States in 2011" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "
" ], "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import HTML\n", "\n", "HTML('''\n", "
''')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The following shows number of breweries in the United States in 2011 per state, along breweries per capita per state and breweries per sq mi per state. Population data was taken from 2010 census." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd\n", "population = pd.read_csv('./../data/pop_density.csv', skiprows=3, index_col=0)['2010_POPULATION'].to_dict()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "area_string = '''Alaska\t1\t663,267 sq mi\t1,717,854 sq km\n", "Texas\t2\t268,580 sq mi\t695,621 sq km\n", "California\t3\t163,695 sq mi\t423,970 sq km\n", "Montana\t4\t147,042 sq mi\t380,838 sq km\n", "New Mexico\t5\t121,589 sq mi\t314,915 sq km\n", "Arizona\t6\t113,998 sq mi\t295,254 sq km\n", "Nevada\t7\t110,560 sq mi\t286,351 sq km\n", "Colorado\t8\t104,093 sq mi\t269,601 sq km\n", "Oregon\t9\t98,380 sq mi\t254,805 sq km\n", "Wyoming\t10\t97,813 sq mi\t253,336 sq km\n", "Michigan\t11\t96,716 sq mi\t250,494 sq km\n", "Minnesota\t12\t86,938 sq mi\t225,171 sq km\n", "Utah\t13\t84,898 sq mi\t219,887 sq km\n", "Idaho\t14\t83,570 sq mi\t216,446 sq km\n", "Kansas\t15\t82,276 sq mi\t213,096 sq km\n", "Nebraska\t16\t77,358 sq mi\t200,356 sq km\n", "South Dakota\t17\t77,121 sq mi\t199,742 sq km\n", "Washington\t18\t71,300 sq mi\t184,665 sq km\n", "North Dakota\t19\t70,700 sq mi\t183,112 sq km\n", "Oklahoma\t20\t69,899 sq mi\t181,035 sq km\n", "Missouri\t21\t69,704 sq mi\t180,533 sq km\n", "Florida\t22\t65,755 sq mi\t170,304 sq km\n", "Wisconsin\t23\t65,498 sq mi\t169,639 sq km\n", "Georgia\t24\t59,425 sq mi\t153,909 sq km\n", "Illinois\t25\t57,914 sq mi\t149,998 sq km\n", "Iowa\t26\t56,271 sq mi\t145,743 sq km\n", "New York\t27\t54,556 sq mi\t141,299 sq km\n", "North Carolina\t28\t53,818 sq mi\t139,389 sq km\n", "Arkansas\t29\t53,179 sq mi\t137,732 sq km\n", "Alabama\t30\t52,419 sq mi\t135,765 sq km\n", "Louisiana\t31\t51,840 sq mi\t134,264 sq km\n", "Mississippi\t32\t48,431 sq mi\t125,434 sq km\n", "Pennsylvania\t33\t46,056 sq mi\t119,283 sq km\n", "Ohio\t34\t44,825 sq mi\t116,096 sq km\n", "Virginia\t35\t42,774 sq mi\t110,785 sq km\n", "Tennessee\t36\t42,144 sq mi\t109,151 sq km\n", "Kentucky\t37\t40,410 sq mi\t104,659 sq km\n", "Indiana\t38\t36,418 sq mi\t94,321 sq km\n", "Maine\t39\t35,385 sq mi\t91,646 sq km\n", "South Carolina\t40\t32,020 sq mi\t82,932 sq km\n", "West Virginia\t41\t24,230 sq mi\t62,755 sq km\n", "Maryland\t42\t12,407 sq mi\t32,133 sq km\n", "Hawaii\t43\t10,931 sq mi\t28,311 sq km\n", "Massachusetts\t44\t10,555 sq mi\t27,336 sq km\n", "Vermont\t45\t9,615 sq mi\t24,901 sq km\n", "New Hampshire\t46\t9,350 sq mi\t24,216 sq km\n", "New Jersey\t47\t8,722 sq mi\t22,588 sq km\n", "Connecticut\t48\t5,544 sq mi\t14,357 sq km\n", "Delaware\t49\t2,489 sq mi\t6,447 sq km\n", "Rhode Island\t50\t1,545 sq mi\t4,002 sq km\n", "District of Columbia\t51\t68.25 sq mi\t176.75 sq km'''\n", "\n", "area_string = area_string.replace('sq mi', '').replace('sq km', '')\n", "\n", "area = dict()\n", "\n", "for l in area_string.splitlines():\n", " data = l.split()\n", " size = int(float(data[-2].replace(',', '')))\n", " name = ' '.join(data[0:-3])\n", " area[name] = size" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "\n", "\n", "df = pd.read_csv('./../data/breweries.csv', index_col=0)\n", "\n", "df.columns\n", "\n", "us_df = df[df['country']=='United States'].copy(deep=True)\n", "\n", "us_df.loc[471, 'state'] = 'Florida'\n", "\n", "us_df = us_df[~ us_df['state'].isnull()]\n", "\n", "us_df.loc[1393, 'state'] = 'Maine'\n", "us_df.loc[1397, 'state'] = 'Kansas'\n", "us_df.loc[1398, 'state'] = 'Illinois'\n", "us_df.loc[1399, 'state'] = 'New Jersey'\n", "us_df.loc[1402, 'state'] = 'New York'\n", "us_df.loc[1404, 'state'] = 'Missouri'\n", "us_df.loc[[1407, 1413], 'state'] = 'North Carolina'\n", "us_df.loc[[1409], 'state'] = 'Ohio'\n", "us_df.loc[[1410], 'state'] = 'Wisconsin'\n", "us_df.loc[[1411], 'state'] = 'Massachusetts'\n", "us_df.loc[[1416], 'state'] = 'Michigan'\n", "us_df.loc[[1417, 1418], 'state'] = 'Oregon'\n", "us_df.loc[[1420], 'state'] = 'California'\n", "us_df.loc[[1421], 'state'] = 'District of Columbia'\n", "\n", "us_df = us_df[us_df['state'] != 'Virgin Islands']\n", "us_df = us_df[us_df['state'] != 'District of Columbia']\n", "\n", "\n", "states = dict()\n", "for s in sorted(us_df['state'].unique()):\n", " states[s] = dict()\n", " states[s]['area'] = area[s]\n", " states[s]['population'] = population[s]\n", " states[s]['count'] = len(us_df[us_df['state'] == s])\n", "\n", "\n", "states_df = pd.DataFrame(states).T\n", "\n", "states_df['per_capita'] = states_df['count']/states_df['population']\n", "states_df['per_sqmiles'] = states_df['count']/states_df['area']\n", "states_df['states'] = states_df.index\n", "states_df = states_df.sort_values('count', ascending=False)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
areacountpopulationper_capitaper_sqmilesstates
California163695128372539560.0000030.000782California
Wisconsin654988256869860.0000140.001252Wisconsin
Colorado1040936050291960.0000120.000576Colorado
Pennsylvania4605653127023790.0000040.001151Pennsylvania
Illinois5791437128306320.0000030.000639Illinois
\n", "
" ], "text/plain": [ " area count population per_capita per_sqmiles states\n", "California 163695 128 37253956 0.000003 0.000782 California\n", "Wisconsin 65498 82 5686986 0.000014 0.001252 Wisconsin\n", "Colorado 104093 60 5029196 0.000012 0.000576 Colorado\n", "Pennsylvania 46056 53 12702379 0.000004 0.001151 Pennsylvania\n", "Illinois 57914 37 12830632 0.000003 0.000639 Illinois" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "states_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "northeast = ['New Jersey', 'New York', 'Pennsylvania', 'Connecticut', 'Maine', 'Massachusetts', 'New Hampshire', 'Rhode Island', 'Vermont']\n", "midwest = ['Illinois', 'Indiana', 'Michigan', 'Ohio', 'Wisconsin', 'Iowa', 'Kansas', 'Minnesota', 'Missouri', 'Nebraska', 'North Dakota', 'South Dakota']\n", "south = ['Delaware', 'Florida', 'Georgia', 'Maryland', 'North Carolina', 'South Carolina', 'Virginia', 'District of Columbia', 'West Virginia', 'Alabama', 'Kentucky', 'Mississippi', 'Tennessee', \n", " 'Arkansas', 'Louisiana', 'Oklahoma', 'Texas']\n", "west = ['Arizona', 'Colorado', 'Idaho', 'Montana', 'Nevada', 'New Mexico', 'Utah', 'Wyoming',\n", " 'Alaska', 'California', 'Hawaii', 'Oregon', 'Washington']" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def f(v):\n", " if v in northeast:\n", " return 'northeast'\n", " elif v in midwest:\n", " return 'midwest'\n", " elif v in south:\n", " return 'south'\n", " elif v in west:\n", " return 'west'\n", " else:\n", " return 'other'\n", "\n", "states_df['region'] = states_df['states'].apply(f)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "states_df['population_format'] = states_df['population'].apply(lambda x: \"{:,}\".format(x))\n", "states_df['area_format'] = states_df['area'].apply(lambda x: \"{:,}\".format(x))\n", "\n", "states_df['x'] = range(0, len(states_df.index))\n", "states_df['states'] = states_df.index\n", "states_df['count_by_2'] = states_df['count']/2\n", "states_df['color'] = '#1F77B4'\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(global) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", "\n", " if (typeof (window._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n", " window._bokeh_onload_callbacks = [];\n", " window._bokeh_is_loading = undefined;\n", " }\n", "\n", "\n", " \n", " if (typeof (window._bokeh_timeout) === \"undefined\" || force === true) {\n", " window._bokeh_timeout = Date.now() + 5000;\n", " window._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " if (window.Bokeh !== undefined) {\n", " document.getElementById(\"37cf967c-ee27-439a-8dfc-5a930d4e97b4\").textContent = \"BokehJS successfully loaded.\";\n", " } else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", " function run_callbacks() {\n", " window._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", " delete window._bokeh_onload_callbacks\n", " console.info(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(js_urls, callback) {\n", " window._bokeh_onload_callbacks.push(callback);\n", " if (window._bokeh_is_loading > 0) {\n", " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " window._bokeh_is_loading = js_urls.length;\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var s = document.createElement('script');\n", " s.src = url;\n", " s.async = false;\n", " s.onreadystatechange = s.onload = function() {\n", " window._bokeh_is_loading--;\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", " run_callbacks()\n", " }\n", " };\n", " s.onerror = function() {\n", " console.warn(\"failed to load library \" + url);\n", " };\n", " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", " }\n", " };var element = document.getElementById(\"37cf967c-ee27-439a-8dfc-5a930d4e97b4\");\n", " if (element == null) {\n", " console.log(\"Bokeh: ERROR: autoload.js configured with elementid '37cf967c-ee27-439a-8dfc-5a930d4e97b4' but no matching script tag was found. \")\n", " return false;\n", " }\n", "\n", " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.4.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.4.min.js\"];\n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " \n", " function(Bokeh) {\n", " \n", " document.getElementById(\"37cf967c-ee27-439a-8dfc-5a930d4e97b4\").textContent = \"BokehJS is loading...\";\n", " },\n", " function(Bokeh) {\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.4.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.4.min.css\");\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.4.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.4.min.css\");\n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " \n", " if ((window.Bokeh !== undefined) || (force === true)) {\n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i](window.Bokeh);\n", " }if (force === true) {\n", " display_loaded();\n", " }} else if (Date.now() < window._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!window._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " window._bokeh_failed_load = true;\n", " } else if (force !== true) {\n", " var cell = $(document.getElementById(\"37cf967c-ee27-439a-8dfc-5a930d4e97b4\")).parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", "\n", " }\n", "\n", " if (window._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(js_urls, function() {\n", " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(this));" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import bokeh\n", "from bokeh.models import ( ColumnDataSource, HoverTool, Circle, CategoricalColorMapper,\n", " LinearInterpolator, Row, \n", " CategoricalTickFormatter, CustomJS,\n", " Rect, )\n", "from bokeh.plotting import figure\n", "from bokeh.io import output_notebook, show\n", "output_notebook()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Number of breweries and per capita vs per sq. mile breweries" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "source = ColumnDataSource(data=states_df)\n", "size_mapper = LinearInterpolator(x=[states_df['population'].min(), states_df['population'].max()], y=[2, 10])\n", "\n", "hover1 = HoverTool(\n", " tooltips=\"\"\"\n", "
\n", "
\n", " [@index]\n", "
\n", "
\n", " Population: @population_format\n", "
\n", "
\n", " Area(sq mi): @area_format\n", "
\n", "
\n", " \"\"\",\n", " )\n", "\n", "\n", "p1 = figure(width=475, height=400, tools='pan,box_select,box_zoom,reset')\n", "p1.x_range.bounds = (0, 3e-5)\n", "p1.x_range.end = 3e-5\n", "p1.y_range.bounds = (0, 3e-3)\n", "p1.y_range.end = 3e-3\n", "circle = Circle(x='per_capita', y='per_sqmiles',\n", " radius_units='screen', fill_color='color',\n", " radius={'field':'population', 'transform': size_mapper},)\n", "p1.add_glyph(source, circle)\n", "p1.xaxis.axis_label = 'Breweries Per Capita'\n", "p1.yaxis.axis_label = 'Breweries Per Sq Mile'\n", "\n", "hover2 = HoverTool(\n", " tooltips=\"\"\"\n", "
\n", "
\n", " [@states]\n", "
\n", "
\n", " Count: @count\n", "
\n", "
\n", " \"\"\",\n", " )\n", "\n", "p2 = figure(width=475, height=400, x_range=list(states_df.index), tools='pan,box_select,box_zoom,reset')\n", "rect = Rect(x='states', y='count_by_2', width=1, height='count', fill_color='color')\n", "p2.y_range.bounds = (0, 150)\n", "p2.y_range.start = 0\n", "p2.xaxis.major_label_orientation = 3.14 / 3\n", "p2.x_range.bounds = list(states_df.index)\n", "p2.add_glyph(source, rect)\n", "p2.yaxis.axis_label = 'Number of breweries'\n", "\n", "\n", "callback1 = CustomJS(args=dict(source=source, hover=hover2), code=\"\"\"\n", "\n", "for (i=0; i < source.data.color.length; i++) {\n", " source.data.color[i] = '#1F77B4'\n", "}\n", "\n", "var indices = cb_data.index['1d'].indices;\n", "for (i=0; i < indices.length; i++) {\n", " ind0 = indices[i]\n", " source.data.color[ind0] = '#b4531f' \n", "}\n", "\n", "source.trigger('change')\n", "\n", "\"\"\")\n", "\n", "\n", "callback2 = CustomJS(args=dict(source=source, hover=hover1), code=\"\"\"\n", "\n", "for (i=0; i < source.data.color.length; i++) {\n", " source.data.color[i] = '#1F77B4'\n", "}\n", "\n", "var indices = cb_data.index['1d'].indices;\n", "for (i=0; i < indices.length; i++) {\n", " ind0 = indices[i]\n", " source.data.color[ind0] = '#b4531f' \n", "\n", "}\n", "\n", "source.trigger('change')\n", "\n", "\"\"\")\n", "\n", "\n", "hover1.callback = callback1\n", "hover2.callback = callback2\n", "\n", "\n", "p1.add_tools(hover1)\n", "p2.add_tools(hover2)\n", "\n", "\n", "p = Row(p2, p1)\n", "\n", "show(p)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Breweries by region" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import geopandas as gpd\n", "import numpy as np\n", "\n", "def getXYCoords(geometry, coord_type):\n", " \"\"\" Returns either x or y coordinates from geometry coordinate sequence. Used with LineString and Polygon geometries.\"\"\"\n", " if coord_type == 'x':\n", " return geometry.coords.xy[0]\n", " elif coord_type == 'y':\n", " return geometry.coords.xy[1]\n", "\n", "def getPolyCoords(geometry, coord_type):\n", " \"\"\" Returns Coordinates of Polygon using the Exterior of the Polygon.\"\"\"\n", " ext = geometry.exterior\n", " return getXYCoords(ext, coord_type)\n", " \n", "def getLineCoords(geometry, coord_type):\n", " \"\"\" Returns Coordinates of Linestring object.\"\"\"\n", " return getXYCoords(geometry, coord_type)\n", "\n", "def getPointCoords(geometry, coord_type):\n", " \"\"\" Returns Coordinates of Point object.\"\"\"\n", " if coord_type == 'x':\n", " return geometry.x\n", " elif coord_type == 'y':\n", " return geometry.y\n", " \n", "def multiGeomHandler(multi_geometry, coord_type, geom_type):\n", " \"\"\" \n", " Function for handling multi-geometries. Can be MultiPoint, MultiLineString or MultiPolygon. \n", " Returns a list of coordinates where all parts of Multi-geometries are merged into a single list. \n", " Individual geometries are separated with np.nan which is how Bokeh wants them. \n", " # Bokeh documentation regarding the Multi-geometry issues can be found here (it is an open issue)\n", " # https://github.com/bokeh/bokeh/issues/2321\n", " \"\"\"\n", " \n", " for i, part in enumerate(multi_geometry):\n", " # On the first part of the Multi-geometry initialize the coord_array (np.array)\n", " if i == 0:\n", " if geom_type == \"MultiPoint\":\n", " coord_arrays = np.append(getPointCoords(part, coord_type), np.nan)\n", " elif geom_type == \"MultiLineString\":\n", " coord_arrays = np.append(getLineCoords(part, coord_type), np.nan)\n", " elif geom_type == \"MultiPolygon\":\n", " coord_arrays = np.append(getPolyCoords(part, coord_type), np.nan)\n", " else:\n", " if geom_type == \"MultiPoint\":\n", " coord_arrays = np.concatenate([coord_arrays, np.append(getPointCoords(part, coord_type), np.nan)])\n", " elif geom_type == \"MultiLineString\":\n", " coord_arrays = np.concatenate([coord_arrays, np.append(getLineCoords(part, coord_type), np.nan)])\n", " elif geom_type == \"MultiPolygon\":\n", " coord_arrays = np.concatenate([coord_arrays, np.append(getPolyCoords(part, coord_type), np.nan)])\n", " \n", " # Return the coordinates \n", " return coord_arrays\n", " \n", "\n", "def getCoords(row, geom_col, coord_type):\n", " \"\"\"\n", " Returns coordinates ('x' or 'y') of a geometry (Point, LineString or Polygon) as a list (if geometry is LineString or Polygon). \n", " Can handle also MultiGeometries.\n", " \"\"\"\n", " # Get geometry\n", " geom = row[geom_col]\n", " \n", " # Check the geometry type\n", " gtype = geom.geom_type\n", " \n", " # \"Normal\" geometries\n", " # -------------------\n", " \n", " if gtype == \"Point\":\n", " return getPointCoords(geom, coord_type)\n", " elif gtype == \"LineString\":\n", " return list( getLineCoords(geom, coord_type) )\n", " elif gtype == \"Polygon\":\n", " return list( getPolyCoords(geom, coord_type) )\n", " \n", " # Multi geometries\n", " # ----------------\n", " \n", " else:\n", " return list( multiGeomHandler(geom, coord_type, gtype) ) \n", " \n", "data = gpd.read_file('../data/states_21basic/states.shp')\n", "\n", "data = data.to_crs(crs=data.crs)\n", "\n", "data['geom_x'] = data.apply(getCoords, geom_col=\"geometry\", coord_type=\"x\", axis=1)\n", "data['geom_y'] = data.apply(getCoords, geom_col=\"geometry\", coord_type=\"y\", axis=1)\n", "\n", "data = data.drop('geometry', axis=1)\n", "plot_df = pd.merge(data, states_df, left_on='STATE_NAME', right_index=True)\n", "dfsource = ColumnDataSource(data=plot_df)\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "WIDTH = 900\n", "TOOLS = \"pan,wheel_zoom,box_zoom,reset,save\"\n", "p = figure(width=int(WIDTH), height=int(WIDTH/1.5),\n", " title=\"\", tools=TOOLS,\n", " x_axis_location=None, y_axis_location=None\n", ")\n", "\n", "palette = ['#a6cee3',\n", "'#1f78b4',\n", "'#b2df8a',\n", "'#33a02c']\n", "\n", "color_mapper = CategoricalColorMapper(factors=list(states_df['region'].unique()), palette=palette)\n", "# alpha_mapper = LinearInterpolator(x=[states_df['count'].min(), states_df['count'].max()], y=[.1, 1])\n", "\n", "patches = p.patches('geom_x', 'geom_y', \n", " source=dfsource, name='Name',\n", " fill_color={'field': 'region', 'transform': color_mapper}, line_color=\"white\", line_width=0.5)\n", "\n", "hover = HoverTool(renderers=[patches])\n", "hover.tooltips=[(\"Name\", \"@states\"),(\"Count\", \"@count\")]\n", "\n", "p.add_tools(hover)\n", "\n", "show(p)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "
\n", "
\n", "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\n", "source = ColumnDataSource(data=states_df)\n", "\n", "hover1 = HoverTool(\n", " tooltips=\"\"\"\n", "
\n", "
\n", " [@index]\n", "
\n", "
\n", " Population: @population_format\n", "
\n", "
\n", " Area(sq mi): @area_format\n", "
\n", "
\n", " \"\"\",\n", " )\n", "\n", "\n", "p1 = figure(width=475, height=400, tools='pan,box_select,box_zoom,reset')\n", "p1.x_range.bounds = (0, 3e-5)\n", "p1.x_range.end = 3e-5\n", "p1.y_range.bounds = (0, 3e-3)\n", "p1.y_range.end = 3e-3\n", "circle = Circle(x='per_capita', y='per_sqmiles',\n", " radius_units='screen', fill_color={'field': 'region', 'transform': color_mapper},\n", " radius={'field':'population', 'transform': size_mapper},)\n", "p1.add_glyph(source, circle)\n", "p1.xaxis.axis_label = 'Breweries Per Capita'\n", "p1.yaxis.axis_label = 'Breweries Per Sq Mile'\n", "\n", "hover2 = HoverTool(\n", " tooltips=\"\"\"\n", "
\n", "
\n", " [@states]\n", "
\n", "
\n", " Count: @count\n", "
\n", "
\n", " \"\"\",\n", " )\n", "\n", "p2 = figure(width=475, height=400, x_range=list(states_df.index), tools='pan,box_select,box_zoom,reset')\n", "rect = Rect(x='states', y='count_by_2', width=1, height='count', fill_color={'field': 'region', 'transform': color_mapper},)\n", "p2.y_range.bounds = (0, 150)\n", "p2.y_range.start = 0\n", "p2.xaxis.major_label_orientation = 3.14 / 3\n", "p2.x_range.bounds = list(states_df.index)\n", "p2.add_glyph(source, rect)\n", "p2.yaxis.axis_label = 'Number of breweries'\n", "\n", "p1.add_tools(hover1)\n", "p2.add_tools(hover2)\n", "\n", "\n", "p = Row(p2, p1)\n", "\n", "show(p)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "##### States with the most breweries" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
count
California128
Wisconsin82
Colorado60
Pennsylvania53
Illinois37
\n", "
" ], "text/plain": [ " count\n", "California 128\n", "Wisconsin 82\n", "Colorado 60\n", "Pennsylvania 53\n", "Illinois 37" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(states_df.sort_values('count', ascending=False).head(5)['count'])" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "###### States with highest breweries per capita" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
per_capita
Vermont0.000026
Wisconsin0.000014
Montana0.000012
Colorado0.000012
Alaska0.000011
\n", "
" ], "text/plain": [ " per_capita\n", "Vermont 0.000026\n", "Wisconsin 0.000014\n", "Montana 0.000012\n", "Colorado 0.000012\n", "Alaska 0.000011" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(states_df.sort_values('per_capita', ascending=False).head(5)['per_capita'])" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "###### States with highest breweries per sq. mile" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
per_sqmiles
Delaware0.002411
New Jersey0.002178
Vermont0.001664
Massachusetts0.001611
Maryland0.001370
\n", "
" ], "text/plain": [ " per_sqmiles\n", "Delaware 0.002411\n", "New Jersey 0.002178\n", "Vermont 0.001664\n", "Massachusetts 0.001611\n", "Maryland 0.001370" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(states_df.sort_values('per_sqmiles', ascending=False).head(5)['per_sqmiles'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }