{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).\n" ] }, { "data": { "text/html": [ "\n", " \n", " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from lets_plot import *\n", "from lets_plot.geo_data import *\n", "\n", "from lets_plot.settings_utils import geocoding_service\n", "#LetsPlot.set(geocoding_service(url='http://3.86.228.157:3025'))\n", "\n", "import pandas as pd\n", "\n", "LetsPlot.setup_html()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idState_CodeState_NameState_abCountyCityPlaceTypePrimaryZip_CodeArea_CodeALandAWaterLatLonMeanMedianStdevsum_w
010110001AlabamaALMobile CountyChickasawChickasaw cityCityplace366112511089495290915630.771450-88.0796973877330506331011638.260513
110110101AlabamaALBarbour CountyLouisvilleClio cityCityplace36048334260703252325431.708516-85.611039377251952843789258.017685
210110201AlabamaALShelby CountyColumbianaColumbiana cityCityplace350512054483527426103433.191452-86.615618546063193057348926.031000
\n", "
" ], "text/plain": [ " id State_Code State_Name State_ab County City \\\n", "0 1011000 1 Alabama AL Mobile County Chickasaw \n", "1 1011010 1 Alabama AL Barbour County Louisville \n", "2 1011020 1 Alabama AL Shelby County Columbiana \n", "\n", " Place Type Primary Zip_Code Area_Code ALand AWater \\\n", "0 Chickasaw city City place 36611 251 10894952 909156 \n", "1 Clio city City place 36048 334 26070325 23254 \n", "2 Columbiana city City place 35051 205 44835274 261034 \n", "\n", " Lat Lon Mean Median Stdev sum_w \n", "0 30.771450 -88.079697 38773 30506 33101 1638.260513 \n", "1 31.708516 -85.611039 37725 19528 43789 258.017685 \n", "2 33.191452 -86.615618 54606 31930 57348 926.031000 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "income_all = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/US_household_income_2017.csv', encoding='latin-1')\n", "income_all.head(3)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
State_NameMean
0Alabama53612.925856
1Alaska77670.209524
2Arizona62578.071313
\n", "
" ], "text/plain": [ " State_Name Mean\n", "0 Alabama 53612.925856\n", "1 Alaska 77670.209524\n", "2 Arizona 62578.071313" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "income_by_state = income_all.groupby(\"State_Name\", as_index=False)[\"Mean\"].mean()\n", "income_by_state.head(3)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
State_NameCountyMean
0AlabamaAutauga County53735.557235
1AlabamaBarbour County37725.000000
2AlabamaBlount County55127.000000
\n", "
" ], "text/plain": [ " State_Name County Mean\n", "0 Alabama Autauga County 53735.557235\n", "1 Alabama Barbour County 37725.000000\n", "2 Alabama Blount County 55127.000000" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "income_by_county = income_all.groupby([\"State_Name\",\"County\"], as_index=False)[\"Mean\"].mean()\n", "income_by_county.head(3)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'regions_state' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32mC:\\Temp/ipykernel_10588/1550784517.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mus48\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mregions_state\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'us-48'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_data_frame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'found name'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mNameError\u001b[0m: name 'regions_state' is not defined" ] } ], "source": [ "us48 = regions_state('us-48').to_data_frame()['found name'].tolist()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = income_by_county\n", "data = data[data.State_Name.isin(us48)]\n", "row_count, _ = data.shape\n", "print(row_count)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "counties = regions_builder2('county', \n", " names=data[\"County\"].tolist(), \n", " states=data[\"State_Name\"].tolist())\\\n", " .drop_not_matched()\\\n", " .build()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "counties.to_data_frame()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "centroids=counties.centroids()\n", "centroids" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# map_join is lacking multi-key support, so we use pandas.merge\n", "data_with_geometry = centroids.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])\n", "data_with_geometry" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ggplot() + geom_point(aes(color='Mean'), data_with_geometry)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "boundaries=counties.boundaries()\n", "boundaries" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# map_join is lacking multi-key support, so we use pandas.merge\n", "data_with_boundaries = boundaries.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])\n", "data_with_boundaries\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "map_theme = theme(axis_line=\"blank\", axis_text=\"blank\", axis_title=\"blank\", axis_ticks=\"blank\") + ggsize(900, 400)\n", "ggplot() + geom_map(aes(fill='Mean'), data_with_boundaries) + scale_fill_gradient(low=\"#007BCD\", high=\"#FE0968\", name=\"Mean income\") + map_theme" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Issues" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# drop_not_found breaks parents - these columns are missing\n", "regions_builder2('county', \n", " names=['Wayne County', 'Not existing County', 'Anson County'],\n", " states=['New York', 'New York', 'North Carolina'],\n", " countries=['usa', 'usa', 'usa'])\\\n", " .drop_not_found()\\\n", " .build()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# issue with parents geocoding - unexpected ranking behaviour results in broken responses.\n", "# When mulitply object found by one request ambiguous response is generated without use of ranking by weight. \n", "# Ambiguous response is also borken - it returns success response with first namesake object ¯\\_(ツ)_/¯\n", "regions_builder2('county', \n", " names=['Wayne County', 'Essex County'],\n", " states=['New York', 'Virginia'],\n", " countries=['usa', 'usa'])\\\n", " .build()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# not informative error message\n", "regions_builder2('county', \n", " names=['Wayne County', 'Essex County'],\n", " states=['New York', 'Virginia'],\n", " countries=['usa'])\\\n", " .build()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# regions in parent is not yet supported\n", "state_regions = regions_builder2('state', names=data[\"State_Name\"].tolist(), countries=['uSa'] * row_count).build()\n", "counties_via_regions = regions_builder2('county', \n", " names=data[\"County\"].tolist(), \n", " states=state_regions)\\\n", " .drop_not_matched()\\\n", " .build()\n", "counties_via_regions.to_data_frame()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "regions_builder2('state', names=['florida'], scope='Uruguay').build()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "regions_builder2('state', names=['florida', 'florida'], countries=['usa', 'Uruguay']).build()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 1 }