{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from lets_plot import *\n",
"from lets_plot.geo_data import *\n",
"\n",
"from lets_plot.settings_utils import geocoding_service\n",
"#LetsPlot.set(geocoding_service(url='http://3.86.228.157:3025'))\n",
"\n",
"import pandas as pd\n",
"\n",
"LetsPlot.setup_html()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" State_Code | \n",
" State_Name | \n",
" State_ab | \n",
" County | \n",
" City | \n",
" Place | \n",
" Type | \n",
" Primary | \n",
" Zip_Code | \n",
" Area_Code | \n",
" ALand | \n",
" AWater | \n",
" Lat | \n",
" Lon | \n",
" Mean | \n",
" Median | \n",
" Stdev | \n",
" sum_w | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1011000 | \n",
" 1 | \n",
" Alabama | \n",
" AL | \n",
" Mobile County | \n",
" Chickasaw | \n",
" Chickasaw city | \n",
" City | \n",
" place | \n",
" 36611 | \n",
" 251 | \n",
" 10894952 | \n",
" 909156 | \n",
" 30.771450 | \n",
" -88.079697 | \n",
" 38773 | \n",
" 30506 | \n",
" 33101 | \n",
" 1638.260513 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1011010 | \n",
" 1 | \n",
" Alabama | \n",
" AL | \n",
" Barbour County | \n",
" Louisville | \n",
" Clio city | \n",
" City | \n",
" place | \n",
" 36048 | \n",
" 334 | \n",
" 26070325 | \n",
" 23254 | \n",
" 31.708516 | \n",
" -85.611039 | \n",
" 37725 | \n",
" 19528 | \n",
" 43789 | \n",
" 258.017685 | \n",
"
\n",
" \n",
" | 2 | \n",
" 1011020 | \n",
" 1 | \n",
" Alabama | \n",
" AL | \n",
" Shelby County | \n",
" Columbiana | \n",
" Columbiana city | \n",
" City | \n",
" place | \n",
" 35051 | \n",
" 205 | \n",
" 44835274 | \n",
" 261034 | \n",
" 33.191452 | \n",
" -86.615618 | \n",
" 54606 | \n",
" 31930 | \n",
" 57348 | \n",
" 926.031000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id State_Code State_Name State_ab County City \\\n",
"0 1011000 1 Alabama AL Mobile County Chickasaw \n",
"1 1011010 1 Alabama AL Barbour County Louisville \n",
"2 1011020 1 Alabama AL Shelby County Columbiana \n",
"\n",
" Place Type Primary Zip_Code Area_Code ALand AWater \\\n",
"0 Chickasaw city City place 36611 251 10894952 909156 \n",
"1 Clio city City place 36048 334 26070325 23254 \n",
"2 Columbiana city City place 35051 205 44835274 261034 \n",
"\n",
" Lat Lon Mean Median Stdev sum_w \n",
"0 30.771450 -88.079697 38773 30506 33101 1638.260513 \n",
"1 31.708516 -85.611039 37725 19528 43789 258.017685 \n",
"2 33.191452 -86.615618 54606 31930 57348 926.031000 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"income_all = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/US_household_income_2017.csv', encoding='latin-1')\n",
"income_all.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" State_Name | \n",
" Mean | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Alabama | \n",
" 53612.925856 | \n",
"
\n",
" \n",
" | 1 | \n",
" Alaska | \n",
" 77670.209524 | \n",
"
\n",
" \n",
" | 2 | \n",
" Arizona | \n",
" 62578.071313 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" State_Name Mean\n",
"0 Alabama 53612.925856\n",
"1 Alaska 77670.209524\n",
"2 Arizona 62578.071313"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"income_by_state = income_all.groupby(\"State_Name\", as_index=False)[\"Mean\"].mean()\n",
"income_by_state.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" State_Name | \n",
" County | \n",
" Mean | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Alabama | \n",
" Autauga County | \n",
" 53735.557235 | \n",
"
\n",
" \n",
" | 1 | \n",
" Alabama | \n",
" Barbour County | \n",
" 37725.000000 | \n",
"
\n",
" \n",
" | 2 | \n",
" Alabama | \n",
" Blount County | \n",
" 55127.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" State_Name County Mean\n",
"0 Alabama Autauga County 53735.557235\n",
"1 Alabama Barbour County 37725.000000\n",
"2 Alabama Blount County 55127.000000"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"income_by_county = income_all.groupby([\"State_Name\",\"County\"], as_index=False)[\"Mean\"].mean()\n",
"income_by_county.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'regions_state' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32mC:\\Temp/ipykernel_10588/1550784517.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mus48\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mregions_state\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'us-48'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_data_frame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'found name'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mNameError\u001b[0m: name 'regions_state' is not defined"
]
}
],
"source": [
"us48 = regions_state('us-48').to_data_frame()['found name'].tolist()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = income_by_county\n",
"data = data[data.State_Name.isin(us48)]\n",
"row_count, _ = data.shape\n",
"print(row_count)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"counties = regions_builder2('county', \n",
" names=data[\"County\"].tolist(), \n",
" states=data[\"State_Name\"].tolist())\\\n",
" .drop_not_matched()\\\n",
" .build()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"counties.to_data_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"centroids=counties.centroids()\n",
"centroids"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# map_join is lacking multi-key support, so we use pandas.merge\n",
"data_with_geometry = centroids.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])\n",
"data_with_geometry"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ggplot() + geom_point(aes(color='Mean'), data_with_geometry)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"boundaries=counties.boundaries()\n",
"boundaries"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# map_join is lacking multi-key support, so we use pandas.merge\n",
"data_with_boundaries = boundaries.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])\n",
"data_with_boundaries\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"map_theme = theme(axis_line=\"blank\", axis_text=\"blank\", axis_title=\"blank\", axis_ticks=\"blank\") + ggsize(900, 400)\n",
"ggplot() + geom_map(aes(fill='Mean'), data_with_boundaries) + scale_fill_gradient(low=\"#007BCD\", high=\"#FE0968\", name=\"Mean income\") + map_theme"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Issues"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# drop_not_found breaks parents - these columns are missing\n",
"regions_builder2('county', \n",
" names=['Wayne County', 'Not existing County', 'Anson County'],\n",
" states=['New York', 'New York', 'North Carolina'],\n",
" countries=['usa', 'usa', 'usa'])\\\n",
" .drop_not_found()\\\n",
" .build()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# issue with parents geocoding - unexpected ranking behaviour results in broken responses.\n",
"# When mulitply object found by one request ambiguous response is generated without use of ranking by weight. \n",
"# Ambiguous response is also borken - it returns success response with first namesake object ¯\\_(ツ)_/¯\n",
"regions_builder2('county', \n",
" names=['Wayne County', 'Essex County'],\n",
" states=['New York', 'Virginia'],\n",
" countries=['usa', 'usa'])\\\n",
" .build()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# not informative error message\n",
"regions_builder2('county', \n",
" names=['Wayne County', 'Essex County'],\n",
" states=['New York', 'Virginia'],\n",
" countries=['usa'])\\\n",
" .build()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# regions in parent is not yet supported\n",
"state_regions = regions_builder2('state', names=data[\"State_Name\"].tolist(), countries=['uSa'] * row_count).build()\n",
"counties_via_regions = regions_builder2('county', \n",
" names=data[\"County\"].tolist(), \n",
" states=state_regions)\\\n",
" .drop_not_matched()\\\n",
" .build()\n",
"counties_via_regions.to_data_frame()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"regions_builder2('state', names=['florida'], scope='Uruguay').build()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"regions_builder2('state', names=['florida', 'florida'], countries=['usa', 'Uruguay']).build()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 1
}