{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# LA County coronavirus vaccine administration by city/community" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A data analysis for a story in the L.A. Times, by Matt Stiles & Luke Money.\n", "\n", "Questions? [matt.stiles@latimes.com](mailto:matt.stiles@latimes.com)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load Python tools" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext lab_black" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "import glob\n", "import path\n", "import os" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pd.options.display.max_columns = 50\n", "pd.options.display.max_rows = 1000" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create a timeseries from the daily scrapes for the coronavirus tracker data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "path = \"/Users/mhustiles/data/github/coronavirus-tracker/_notebooks/data/raw/vaccines/los-angeles/\"\n", "all_files = glob.glob(os.path.join(path, \"*.csv\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read all the daily csv files, concatenate them and create a single new dataframe" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df_from_each_file = (\n", " pd.read_csv(f, encoding=\"ISO-8859-1\", low_memory=False) for f in all_files\n", ")\n", "concatenated_df = pd.concat(df_from_each_file, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "df = concatenated_df.copy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Strip out the strings placed in columns by the county health officials for small places" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df = df[\n", " (df.vaccinations.str.strip() != \"Unreliable Data\")\n", " & (df.pop_2019.str.strip() != \"No Pop Data\")\n", " & (df.vaccinations_pct.str.strip() != \"Unreliable Data\")\n", " & (df.vaccinations_pct.str.strip() != \"No Pop Data\")\n", " & (df.vaccinations.str.strip() != \"<5\")\n", "]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merging on names is FUN" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "df[\"area\"] = df[\"area\"].str.strip(\" \").str.replace(\"City of \", \"\", regex=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Add the week of the year from the update date column" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df[\"week\"] = pd.to_datetime(df[\"update_date\"]).dt.week" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
areavaccinationspop_2019vaccinations_pctupdate_dateweek
4259West Covina385838887343.42021-04-1215
4406Unincorporated - Angeles National Forest296109627.02021-04-1215
4404Unincorporated - Altadena172053681246.72021-04-1215
4403Unincorporated - Agua Dulce958373625.62021-04-1215
4402Unincorporated - Acton1652696123.72021-04-1215
4401Los Angeles - Woodland Hills273875728447.82021-04-1215
4400Los Angeles - Winnetka158214147138.12021-04-1215
4399Los Angeles - Wilshire Center133704059932.92021-04-1215
4398Los Angeles - Wilmington112364244926.52021-04-1215
4397Los Angeles - Wholesale District123323252837.92021-04-1215
\n", "
" ], "text/plain": [ " area vaccinations pop_2019 \\\n", "4259 West Covina 38583 88873 \n", "4406 Unincorporated - Angeles National Forest 296 1096 \n", "4404 Unincorporated - Altadena 17205 36812 \n", "4403 Unincorporated - Agua Dulce 958 3736 \n", "4402 Unincorporated - Acton 1652 6961 \n", "4401 Los Angeles - Woodland Hills 27387 57284 \n", "4400 Los Angeles - Winnetka 15821 41471 \n", "4399 Los Angeles - Wilshire Center 13370 40599 \n", "4398 Los Angeles - Wilmington 11236 42449 \n", "4397 Los Angeles - Wholesale District 12332 32528 \n", "\n", " vaccinations_pct update_date week \n", "4259 43.4 2021-04-12 15 \n", "4406 27.0 2021-04-12 15 \n", "4404 46.7 2021-04-12 15 \n", "4403 25.6 2021-04-12 15 \n", "4402 23.7 2021-04-12 15 \n", "4401 47.8 2021-04-12 15 \n", "4400 38.1 2021-04-12 15 \n", "4399 32.9 2021-04-12 15 \n", "4398 26.5 2021-04-12 15 \n", "4397 37.9 2021-04-12 15 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values(\"update_date\", ascending=False).head(10)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "df[\"area\"] = df[\"area\"].str.replace(\"City of\", \"\", regex=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Clean up data types" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "df[\"vaccinations\"] = df[\"vaccinations\"].astype(float)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "df[\"pop_2019\"] = df[\"pop_2019\"].astype(float)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "df[\"vaccinations_pct\"] = ((df[\"vaccinations\"] / df[\"pop_2019\"]) * 100).round(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Which week of the year? " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "df[\"week\"] = pd.to_datetime(df[\"update_date\"]).dt.week" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "df[\"weekday\"] = pd.to_datetime(df[\"update_date\"]).dt.day_name()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Just get the updates each Monday" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "df = df[df[\"weekday\"] == \"Monday\"]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
areavaccinationspop_2019vaccinations_pctupdate_dateweekweekday
14637Culver City7445.032840.022.672021-03-019Monday
6981Culver City7445.032840.022.672021-03-0810Monday
15333Culver City9969.032840.030.362021-03-1511Monday
5937Culver City9969.032840.030.362021-03-2212Monday
14289Culver City13819.032840.042.082021-03-2913Monday
1761Culver City13819.032840.042.082021-04-0514Monday
4197Culver City17718.033638.052.672021-04-1215Monday
\n", "
" ], "text/plain": [ " area vaccinations pop_2019 vaccinations_pct update_date \\\n", "14637 Culver City 7445.0 32840.0 22.67 2021-03-01 \n", "6981 Culver City 7445.0 32840.0 22.67 2021-03-08 \n", "15333 Culver City 9969.0 32840.0 30.36 2021-03-15 \n", "5937 Culver City 9969.0 32840.0 30.36 2021-03-22 \n", "14289 Culver City 13819.0 32840.0 42.08 2021-03-29 \n", "1761 Culver City 13819.0 32840.0 42.08 2021-04-05 \n", "4197 Culver City 17718.0 33638.0 52.67 2021-04-12 \n", "\n", " week weekday \n", "14637 9 Monday \n", "6981 10 Monday \n", "15333 11 Monday \n", "5937 12 Monday \n", "14289 13 Monday \n", "1761 14 Monday \n", "4197 15 Monday " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df[\"area\"].str.contains(\"Culver City\")].sort_values(\n", " \"update_date\", ascending=True\n", ").head(100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Widen the data for weekly average columns" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'pd' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m df_pivot = (\n\u001b[0;32m----> 2\u001b[0;31m pd.pivot_table(\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"vaccinations_pct\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"area\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" ] } ], "source": [ "df_pivot = (\n", " pd.pivot_table(\n", " df,\n", " values=\"vaccinations_pct\",\n", " index=\"area\",\n", " columns=\"week\",\n", " aggfunc=\"mean\",\n", " )\n", " .round(2)\n", " .reset_index()\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'df_pivot' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_pivot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'df_pivot' is not defined" ] } ], "source": [ "df_pivot.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Make the columns readable" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "df_pivot.columns = [\"pct_week_\" + str(col) for col in df_pivot.columns]" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'df_pivot' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m df_pivot.rename(\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"pct_week_area\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"name\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"week_pop_2019\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"population\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m )\n", "\u001b[0;31mNameError\u001b[0m: name 'df_pivot' is not defined" ] } ], "source": [ "df_pivot.rename(\n", " columns={\"pct_week_area\": \"name\", \"week_pop_2019\": \"population\"}, inplace=True\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'df_pivot' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_pivot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'df_pivot' is not defined" ] } ], "source": [ "df_pivot.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "df_pivot[\"name\"] = df_pivot[\"name\"].str.strip(\" \")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merge with population and demographic figures/rates from [vaccine disparity project](https://www.latimes.com/projects/la-covid-vaccine-racial-disparities-by-neighborhood-map/)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "disparity = pd.read_csv(\"../../vaccine-disparity/_workspace/neighborhoods_grouped.csv\")" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "disparity[\"name\"] = disparity[\"name\"].str.strip(\" \")" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "disparity_clean = disparity[\n", " [\n", " \"name\",\n", " \"total_pop\",\n", " \"totalpop_17andunder_percent\",\n", " \"totalpop_65andup_percent\",\n", " \"nonwhite_percentage\",\n", " \"majority_white\",\n", " \"majority_hispanic\",\n", " \"median_householdincome\",\n", " ]\n", "]" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "df_merge = pd.merge(df_pivot, disparity_clean, on=\"name\", indicator=True, how=\"left\")" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "df_merge[\"change_week9_to_15\"] = (\n", " ((df_merge[\"pct_week_15\"] - df_merge[\"pct_week_9\"]) / df_merge[\"pct_week_9\"]) * 100\n", ").round()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "df_merge_slim = df_merge[\n", " [\n", " \"name\",\n", " \"pct_week_9\",\n", " \"pct_week_15\",\n", " \"total_pop\",\n", " \"totalpop_17andunder_percent\",\n", " \"totalpop_65andup_percent\",\n", " \"nonwhite_percentage\",\n", " \"majority_white\",\n", " \"majority_hispanic\",\n", " \"median_householdincome\",\n", " \"change_week9_to_15\",\n", " ]\n", "].copy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filter the dataframe to larger places" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "df_merge_slim[\"total_pop\"] = df_merge_slim[\"total_pop\"].fillna(\"\").astype(int)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "larger_places = df_merge_slim[df_merge_slim[\"total_pop\"] > 5000].copy()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Which places with populations of more than 5,000 saw the largest pct increase from week 8 to week 15?" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "largest = (\n", " larger_places[\n", " (larger_places[\"pct_week_9\"].notnull())\n", " & (larger_places[\"pct_week_15\"].notnull())\n", " ]\n", " .sort_values(\"change_week9_to_15\", ascending=False)\n", " .head(10)\n", ")" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namepct_week_15majority_hispanictotalpop_17andunder_percenttotalpop_65andup_percent
161Los Angeles - University Park25.15False0.1197000.061099
273Unincorporated - Littlerock/Pearblossom21.74True0.2841080.139551
155Los Angeles - Thai Town27.97False0.1073680.130282
254Unincorporated - Florence-Firestone25.36True0.3176360.068662
269Unincorporated - Lennox27.16True0.2858760.064121
20Cudahy25.56True0.3056160.069818
49Los Angeles - Alsace29.92True0.2096120.089146
90Los Angeles - Green Meadows23.61True0.3089070.074795
86Los Angeles - Florence-Firestone22.81True0.3194420.055256
169Los Angeles - Vernon Central22.51True0.3236920.062574
\n", "
" ], "text/plain": [ " name pct_week_15 majority_hispanic \\\n", "161 Los Angeles - University Park 25.15 False \n", "273 Unincorporated - Littlerock/Pearblossom 21.74 True \n", "155 Los Angeles - Thai Town 27.97 False \n", "254 Unincorporated - Florence-Firestone 25.36 True \n", "269 Unincorporated - Lennox 27.16 True \n", "20 Cudahy 25.56 True \n", "49 Los Angeles - Alsace 29.92 True \n", "90 Los Angeles - Green Meadows 23.61 True \n", "86 Los Angeles - Florence-Firestone 22.81 True \n", "169 Los Angeles - Vernon Central 22.51 True \n", "\n", " totalpop_17andunder_percent totalpop_65andup_percent \n", "161 0.119700 0.061099 \n", "273 0.284108 0.139551 \n", "155 0.107368 0.130282 \n", "254 0.317636 0.068662 \n", "269 0.285876 0.064121 \n", "20 0.305616 0.069818 \n", "49 0.209612 0.089146 \n", "90 0.308907 0.074795 \n", "86 0.319442 0.055256 \n", "169 0.323692 0.062574 " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "largest[\n", " [\n", " \"name\",\n", " \"pct_week_15\",\n", " \"majority_hispanic\",\n", " \"totalpop_17andunder_percent\",\n", " \"totalpop_65andup_percent\",\n", " ]\n", "]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Mean income and older population share in places with largest pct increases" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "42400" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "round(\n", " larger_places[\n", " (larger_places[\"pct_week_9\"].notnull())\n", " & (larger_places[\"pct_week_15\"].notnull())\n", " ]\n", " .sort_values(\"change_week9_to_15\", ascending=False)\n", " .head(10)[\"median_householdincome\"]\n", " .mean()\n", ")" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8.15" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "round(\n", " larger_places[\n", " (larger_places[\"pct_week_9\"].notnull())\n", " & (larger_places[\"pct_week_15\"].notnull())\n", " ]\n", " .sort_values(\"change_week9_to_15\", ascending=False)\n", " .head(10)[\"totalpop_65andup_percent\"]\n", " .mean()\n", " * 100,\n", " 2,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Mean income and older population share in places with smalles pct increases" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "141297" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "round(\n", " larger_places[\n", " (larger_places[\"pct_week_9\"].notnull())\n", " & (larger_places[\"pct_week_15\"].notnull())\n", " ]\n", " .sort_values(\"change_week9_to_15\", ascending=False)\n", " .tail(10)[\"median_householdincome\"]\n", " .mean()\n", ")" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "22.77" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "round(\n", " larger_places[\n", " (larger_places[\"pct_week_9\"].notnull())\n", " & (larger_places[\"pct_week_15\"].notnull())\n", " ]\n", " .sort_values(\"change_week9_to_15\", ascending=False)\n", " .tail(10)[\"totalpop_65andup_percent\"]\n", " .mean()\n", " * 100,\n", " 2,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Which places with populations of more than 5,000 saw the smallest pct increase from week 8 to week 14?" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "smallest = (\n", " larger_places[\n", " (larger_places[\"pct_week_9\"].notnull())\n", " & (larger_places[\"pct_week_15\"].notnull())\n", " ]\n", " .sort_values(\"change_week9_to_15\", ascending=False)\n", " .tail(10)\n", ")" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namepct_week_15majority_hispanictotalpop_17andunder_percenttotalpop_65andup_percent
200Rancho Palos Verdes50.23False0.2137750.248590
180Los Angeles - Westwood35.76False0.0889210.119817
203Rolling Hills Estates55.39False0.2224680.247619
195Palos Verdes Estates51.26False0.2134550.270924
81Los Angeles - Encino54.11False0.2071760.199472
64Los Angeles - Century City59.68False0.1773790.234012
67Los Angeles - Cheviot Hills63.66False0.2112830.202283
55Los Angeles - Beverly Crest54.03False0.1960940.237505
10Beverly Hills51.69False0.2025200.213604
54Los Angeles - Bel Air52.68False0.2099390.303493
\n", "
" ], "text/plain": [ " name pct_week_15 majority_hispanic \\\n", "200 Rancho Palos Verdes 50.23 False \n", "180 Los Angeles - Westwood 35.76 False \n", "203 Rolling Hills Estates 55.39 False \n", "195 Palos Verdes Estates 51.26 False \n", "81 Los Angeles - Encino 54.11 False \n", "64 Los Angeles - Century City 59.68 False \n", "67 Los Angeles - Cheviot Hills 63.66 False \n", "55 Los Angeles - Beverly Crest 54.03 False \n", "10 Beverly Hills 51.69 False \n", "54 Los Angeles - Bel Air 52.68 False \n", "\n", " totalpop_17andunder_percent totalpop_65andup_percent \n", "200 0.213775 0.248590 \n", "180 0.088921 0.119817 \n", "203 0.222468 0.247619 \n", "195 0.213455 0.270924 \n", "81 0.207176 0.199472 \n", "64 0.177379 0.234012 \n", "67 0.211283 0.202283 \n", "55 0.196094 0.237505 \n", "10 0.202520 0.213604 \n", "54 0.209939 0.303493 " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "smallest[\n", " [\n", " \"name\",\n", " \"pct_week_15\",\n", " \"majority_hispanic\",\n", " \"totalpop_17andunder_percent\",\n", " \"totalpop_65andup_percent\",\n", " ]\n", "]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Which places have the highest vaccination coverage right now?" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namepct_week_15majority_hispanictotalpop_17andunder_percenttotalpop_65andup_percent
67Los Angeles - Cheviot Hills63.66False0.2112830.202283
136Los Angeles - Rancho Park62.71False0.2078140.163806
64Los Angeles - Century City59.68False0.1773790.234012
126Los Angeles - Pacific Palisades56.17False0.2486220.236204
134Los Angeles - Playa Vista55.74False0.1866680.104989
203Rolling Hills Estates55.39False0.2224680.247619
81Los Angeles - Encino54.11False0.2071760.199472
55Los Angeles - Beverly Crest54.03False0.1960940.237505
212Sierra Madre53.69False0.1822630.228925
208San Marino53.29False0.2276490.206610
\n", "
" ], "text/plain": [ " name pct_week_15 majority_hispanic \\\n", "67 Los Angeles - Cheviot Hills 63.66 False \n", "136 Los Angeles - Rancho Park 62.71 False \n", "64 Los Angeles - Century City 59.68 False \n", "126 Los Angeles - Pacific Palisades 56.17 False \n", "134 Los Angeles - Playa Vista 55.74 False \n", "203 Rolling Hills Estates 55.39 False \n", "81 Los Angeles - Encino 54.11 False \n", "55 Los Angeles - Beverly Crest 54.03 False \n", "212 Sierra Madre 53.69 False \n", "208 San Marino 53.29 False \n", "\n", " totalpop_17andunder_percent totalpop_65andup_percent \n", "67 0.211283 0.202283 \n", "136 0.207814 0.163806 \n", "64 0.177379 0.234012 \n", "126 0.248622 0.236204 \n", "134 0.186668 0.104989 \n", "203 0.222468 0.247619 \n", "81 0.207176 0.199472 \n", "55 0.196094 0.237505 \n", "212 0.182263 0.228925 \n", "208 0.227649 0.206610 " ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "larger_places[\n", " (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n", "][\n", " [\n", " \"name\",\n", " \"pct_week_15\",\n", " \"majority_hispanic\",\n", " \"totalpop_17andunder_percent\",\n", " \"totalpop_65andup_percent\",\n", " ]\n", "].sort_values(\n", " \"pct_week_15\", ascending=False\n", ").head(\n", " 10\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Which places have the lowest vaccination coverage right now? " ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namepct_week_15majority_hispanictotalpop_17andunder_percenttotalpop_65andup_percent
179Los Angeles - Westlake22.88True0.2301500.093303
86Los Angeles - Florence-Firestone22.81True0.3194420.055256
169Los Angeles - Vernon Central22.51True0.3236920.062574
146Los Angeles - South Park22.28True0.3224960.056965
63Los Angeles - Central22.14True0.3061730.065168
65Los Angeles - Century Palms/Cove21.88True0.3111020.073659
273Unincorporated - Littlerock/Pearblossom21.74True0.2841080.139551
172Los Angeles - Watts21.42True0.3583580.055530
168Los Angeles - Vermont Vista20.70True0.3193450.067000
266Unincorporated - Lake Los Angeles19.11True0.3028450.114554
\n", "
" ], "text/plain": [ " name pct_week_15 majority_hispanic \\\n", "179 Los Angeles - Westlake 22.88 True \n", "86 Los Angeles - Florence-Firestone 22.81 True \n", "169 Los Angeles - Vernon Central 22.51 True \n", "146 Los Angeles - South Park 22.28 True \n", "63 Los Angeles - Central 22.14 True \n", "65 Los Angeles - Century Palms/Cove 21.88 True \n", "273 Unincorporated - Littlerock/Pearblossom 21.74 True \n", "172 Los Angeles - Watts 21.42 True \n", "168 Los Angeles - Vermont Vista 20.70 True \n", "266 Unincorporated - Lake Los Angeles 19.11 True \n", "\n", " totalpop_17andunder_percent totalpop_65andup_percent \n", "179 0.230150 0.093303 \n", "86 0.319442 0.055256 \n", "169 0.323692 0.062574 \n", "146 0.322496 0.056965 \n", "63 0.306173 0.065168 \n", "65 0.311102 0.073659 \n", "273 0.284108 0.139551 \n", "172 0.358358 0.055530 \n", "168 0.319345 0.067000 \n", "266 0.302845 0.114554 " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "larger_places[\n", " (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n", "][\n", " [\n", " \"name\",\n", " \"pct_week_15\",\n", " \"majority_hispanic\",\n", " \"totalpop_17andunder_percent\",\n", " \"totalpop_65andup_percent\",\n", " ]\n", "].sort_values(\n", " \"pct_week_15\", ascending=False\n", ").tail(\n", " 10\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merge the places with the largest and smallest increases in vax coverage for graphics" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "largest[\"category\"] = \"Largest increase\"\n", "smallest[\"category\"] = \"Smallest increase\"" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "large_small = pd.concat([largest, smallest]).reset_index()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "large_small[\"name_clean\"] = (\n", " large_small[\"name\"]\n", " .str.replace(\"City of\", \"\", regex=False)\n", " .str.replace(\"Los Angeles - \", \"\", regex=False)\n", " .str.replace(\"Unincorporated - \", \"\", regex=False)\n", ")" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "large_small_slim = large_small[\n", " [\n", " \"category\",\n", " \"name_clean\",\n", " \"pct_week_9\",\n", " \"pct_week_15\",\n", " \"change_week9_to_15\",\n", " ]\n", "].copy()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "large_small_slim.rename(\n", " columns={\n", " \"change_week9_to_15\": \"% change\",\n", " \"pct_week_15\": \"Week 15\",\n", " \"pct_week_9\": \"Week 9\",\n", " \"name_clean\": \"Place\",\n", " \"category\": \"Category\",\n", " },\n", " inplace=True,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Merge with geography" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "places_geo = gpd.read_file(\n", " \"input/cities-neighborhoods-unincorporated-la-county-no-islands.geojson\"\n", ")" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "places_geo = places_geo.to_crs(\"EPSG:4326\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Clean up places names" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "larger_places[\"name_clean\"] = (\n", " larger_places[\"name\"]\n", " .str.strip(\" \")\n", " .str.replace(\"City of\", \"\", regex=False)\n", " .str.replace(\"Los Angeles - \", \"\", regex=False)\n", " .str.replace(\"Unincorporated - \", \"\", regex=False)\n", ")" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "df_geo = places_geo.merge(larger_places, left_on=\"NAME\", right_on=\"name_clean\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Slim down the geo file and simplify its features for graphics" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "df_geo_slim = df_geo[\n", " [\"name_clean\", \"pct_week_9\", \"pct_week_15\", \"change_week9_to_15\", \"geometry\"]\n", "]" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "df_geo_slim[\n", " [\"name_clean\", \"pct_week_9\", \"pct_week_15\", \"change_week9_to_15\", \"geometry\"]\n", "].to_file(\"output/vaccine-community/df_geo.geojson\", driver=\"GeoJSON\")" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "df_geo_slim[[\"name_clean\", \"change_week9_to_15\"]].to_csv(\n", " \"output/vaccine-community/df_geo_metadata_datawrapper.csv\", index=False\n", ")" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[o] Wrote output/vaccine-community/df_geo_simple.geojson\n" ] } ], "source": [ "!mapshaper -i output/vaccine-community/df_geo.geojson -simplify percentage=.3 no-repair -o output/vaccine-community/df_geo_simple.geojson \\" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Exports" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "import datetime as dt\n", "\n", "today = dt.datetime.today().strftime(\"%m-%d-%Y\")" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [], "source": [ "concatenated_df.to_csv(\n", " \"output/vaccine-community/lac_vax_by_place_raw_table_\" + today + \".csv\",\n", " index=False,\n", ")" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "df_pivot.to_csv(\n", " \"output/vaccine-community/lac_vax_by_place_weeks_\" + today + \".csv\",\n", " index=False,\n", ")" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "larger_places[\n", " (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n", "].sort_values(\"change_week9_to_15\", ascending=False).head(10).to_csv(\n", " \"output/vaccine-community/largest_vax_coverage_change_\" + today + \".csv\",\n", " index=False,\n", ")" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "larger_places[\n", " (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n", "].sort_values(\"change_week9_to_15\", ascending=False).tail(10).to_csv(\n", " \"output/vaccine-community/smallest_vax_coverage_change_\" + today + \".csv\",\n", " index=False,\n", ")" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [], "source": [ "larger_places[\n", " (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n", "][[\"name\", \"pct_week_15\", \"total_pop\"]].sort_values(\n", " \"pct_week_15\", ascending=False\n", ").tail(\n", " 10\n", ").to_csv(\n", " \"output/vaccine-community/lowest_coverage_\" + today + \".csv\",\n", " index=False,\n", ")" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "larger_places[\n", " (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n", "][[\"name\", \"pct_week_15\", \"total_pop\"]].sort_values(\n", " \"pct_week_15\", ascending=False\n", ").head(\n", " 10\n", ").to_csv(\n", " \"output/vaccine-community/highest_coverage_\" + today + \".csv\",\n", " index=False,\n", ")" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "large_small_slim.to_csv(\"output/vaccine-community/large_small_slim.csv\", index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }