{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LA County coronavirus vaccine administration by city/community"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A data analysis for a story in the L.A. Times, by Matt Stiles & Luke Money.\n",
    "\n",
    "Questions? [matt.stiles@latimes.com](mailto:matt.stiles@latimes.com)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load Python tools"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext lab_black"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import geopandas as gpd\n",
    "import glob\n",
    "import path\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.max_columns = 50\n",
    "pd.options.display.max_rows = 1000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create a timeseries from the daily scrapes for the coronavirus tracker data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = \"/Users/mhustiles/data/github/coronavirus-tracker/_notebooks/data/raw/vaccines/los-angeles/\"\n",
    "all_files = glob.glob(os.path.join(path, \"*.csv\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Read all the daily csv files, concatenate them and create a single new dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_from_each_file = (\n",
    "    pd.read_csv(f, encoding=\"ISO-8859-1\", low_memory=False) for f in all_files\n",
    ")\n",
    "concatenated_df = pd.concat(df_from_each_file, ignore_index=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = concatenated_df.copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Strip out the strings placed in columns by the county health officials for small places"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[\n",
    "    (df.vaccinations.str.strip() != \"Unreliable Data\")\n",
    "    & (df.pop_2019.str.strip() != \"No Pop Data\")\n",
    "    & (df.vaccinations_pct.str.strip() != \"Unreliable Data\")\n",
    "    & (df.vaccinations_pct.str.strip() != \"No Pop Data\")\n",
    "    & (df.vaccinations.str.strip() != \"<5\")\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Merging on names is FUN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"area\"] = df[\"area\"].str.strip(\" \").str.replace(\"City of \", \"\", regex=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Add the week of the year from the update date column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"week\"] = pd.to_datetime(df[\"update_date\"]).dt.week"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>area</th>\n",
       "      <th>vaccinations</th>\n",
       "      <th>pop_2019</th>\n",
       "      <th>vaccinations_pct</th>\n",
       "      <th>update_date</th>\n",
       "      <th>week</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4259</th>\n",
       "      <td>West Covina</td>\n",
       "      <td>38583</td>\n",
       "      <td>88873</td>\n",
       "      <td>43.4</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4406</th>\n",
       "      <td>Unincorporated - Angeles National Forest</td>\n",
       "      <td>296</td>\n",
       "      <td>1096</td>\n",
       "      <td>27.0</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4404</th>\n",
       "      <td>Unincorporated - Altadena</td>\n",
       "      <td>17205</td>\n",
       "      <td>36812</td>\n",
       "      <td>46.7</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4403</th>\n",
       "      <td>Unincorporated - Agua Dulce</td>\n",
       "      <td>958</td>\n",
       "      <td>3736</td>\n",
       "      <td>25.6</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4402</th>\n",
       "      <td>Unincorporated - Acton</td>\n",
       "      <td>1652</td>\n",
       "      <td>6961</td>\n",
       "      <td>23.7</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4401</th>\n",
       "      <td>Los Angeles - Woodland Hills</td>\n",
       "      <td>27387</td>\n",
       "      <td>57284</td>\n",
       "      <td>47.8</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4400</th>\n",
       "      <td>Los Angeles - Winnetka</td>\n",
       "      <td>15821</td>\n",
       "      <td>41471</td>\n",
       "      <td>38.1</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4399</th>\n",
       "      <td>Los Angeles - Wilshire Center</td>\n",
       "      <td>13370</td>\n",
       "      <td>40599</td>\n",
       "      <td>32.9</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4398</th>\n",
       "      <td>Los Angeles - Wilmington</td>\n",
       "      <td>11236</td>\n",
       "      <td>42449</td>\n",
       "      <td>26.5</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4397</th>\n",
       "      <td>Los Angeles - Wholesale District</td>\n",
       "      <td>12332</td>\n",
       "      <td>32528</td>\n",
       "      <td>37.9</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                          area vaccinations pop_2019  \\\n",
       "4259                               West Covina        38583    88873   \n",
       "4406  Unincorporated - Angeles National Forest          296     1096   \n",
       "4404                 Unincorporated - Altadena        17205    36812   \n",
       "4403               Unincorporated - Agua Dulce          958     3736   \n",
       "4402                    Unincorporated - Acton         1652     6961   \n",
       "4401              Los Angeles - Woodland Hills        27387    57284   \n",
       "4400                    Los Angeles - Winnetka        15821    41471   \n",
       "4399             Los Angeles - Wilshire Center        13370    40599   \n",
       "4398                  Los Angeles - Wilmington        11236    42449   \n",
       "4397          Los Angeles - Wholesale District        12332    32528   \n",
       "\n",
       "     vaccinations_pct update_date  week  \n",
       "4259             43.4  2021-04-12    15  \n",
       "4406             27.0  2021-04-12    15  \n",
       "4404             46.7  2021-04-12    15  \n",
       "4403             25.6  2021-04-12    15  \n",
       "4402             23.7  2021-04-12    15  \n",
       "4401             47.8  2021-04-12    15  \n",
       "4400             38.1  2021-04-12    15  \n",
       "4399             32.9  2021-04-12    15  \n",
       "4398             26.5  2021-04-12    15  \n",
       "4397             37.9  2021-04-12    15  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(\"update_date\", ascending=False).head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"area\"] = df[\"area\"].str.replace(\"City of\", \"\", regex=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Clean up data types"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"vaccinations\"] = df[\"vaccinations\"].astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"pop_2019\"] = df[\"pop_2019\"].astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"vaccinations_pct\"] = ((df[\"vaccinations\"] / df[\"pop_2019\"]) * 100).round(2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Which week of the year? "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"week\"] = pd.to_datetime(df[\"update_date\"]).dt.week"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"weekday\"] = pd.to_datetime(df[\"update_date\"]).dt.day_name()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Just get the updates each Monday"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[df[\"weekday\"] == \"Monday\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>area</th>\n",
       "      <th>vaccinations</th>\n",
       "      <th>pop_2019</th>\n",
       "      <th>vaccinations_pct</th>\n",
       "      <th>update_date</th>\n",
       "      <th>week</th>\n",
       "      <th>weekday</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>14637</th>\n",
       "      <td>Culver City</td>\n",
       "      <td>7445.0</td>\n",
       "      <td>32840.0</td>\n",
       "      <td>22.67</td>\n",
       "      <td>2021-03-01</td>\n",
       "      <td>9</td>\n",
       "      <td>Monday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6981</th>\n",
       "      <td>Culver City</td>\n",
       "      <td>7445.0</td>\n",
       "      <td>32840.0</td>\n",
       "      <td>22.67</td>\n",
       "      <td>2021-03-08</td>\n",
       "      <td>10</td>\n",
       "      <td>Monday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15333</th>\n",
       "      <td>Culver City</td>\n",
       "      <td>9969.0</td>\n",
       "      <td>32840.0</td>\n",
       "      <td>30.36</td>\n",
       "      <td>2021-03-15</td>\n",
       "      <td>11</td>\n",
       "      <td>Monday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5937</th>\n",
       "      <td>Culver City</td>\n",
       "      <td>9969.0</td>\n",
       "      <td>32840.0</td>\n",
       "      <td>30.36</td>\n",
       "      <td>2021-03-22</td>\n",
       "      <td>12</td>\n",
       "      <td>Monday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14289</th>\n",
       "      <td>Culver City</td>\n",
       "      <td>13819.0</td>\n",
       "      <td>32840.0</td>\n",
       "      <td>42.08</td>\n",
       "      <td>2021-03-29</td>\n",
       "      <td>13</td>\n",
       "      <td>Monday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1761</th>\n",
       "      <td>Culver City</td>\n",
       "      <td>13819.0</td>\n",
       "      <td>32840.0</td>\n",
       "      <td>42.08</td>\n",
       "      <td>2021-04-05</td>\n",
       "      <td>14</td>\n",
       "      <td>Monday</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4197</th>\n",
       "      <td>Culver City</td>\n",
       "      <td>17718.0</td>\n",
       "      <td>33638.0</td>\n",
       "      <td>52.67</td>\n",
       "      <td>2021-04-12</td>\n",
       "      <td>15</td>\n",
       "      <td>Monday</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "              area  vaccinations  pop_2019  vaccinations_pct update_date  \\\n",
       "14637  Culver City        7445.0   32840.0             22.67  2021-03-01   \n",
       "6981   Culver City        7445.0   32840.0             22.67  2021-03-08   \n",
       "15333  Culver City        9969.0   32840.0             30.36  2021-03-15   \n",
       "5937   Culver City        9969.0   32840.0             30.36  2021-03-22   \n",
       "14289  Culver City       13819.0   32840.0             42.08  2021-03-29   \n",
       "1761   Culver City       13819.0   32840.0             42.08  2021-04-05   \n",
       "4197   Culver City       17718.0   33638.0             52.67  2021-04-12   \n",
       "\n",
       "       week weekday  \n",
       "14637     9  Monday  \n",
       "6981     10  Monday  \n",
       "15333    11  Monday  \n",
       "5937     12  Monday  \n",
       "14289    13  Monday  \n",
       "1761     14  Monday  \n",
       "4197     15  Monday  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df[\"area\"].str.contains(\"Culver City\")].sort_values(\n",
    "    \"update_date\", ascending=True\n",
    ").head(100)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Widen the data for weekly average columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'pd' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-3-c8942e05d481>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m df_pivot = (\n\u001b[0;32m----> 2\u001b[0;31m     pd.pivot_table(\n\u001b[0m\u001b[1;32m      3\u001b[0m         \u001b[0mdf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m         \u001b[0mvalues\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"vaccinations_pct\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"area\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined"
     ]
    }
   ],
   "source": [
    "df_pivot = (\n",
    "    pd.pivot_table(\n",
    "        df,\n",
    "        values=\"vaccinations_pct\",\n",
    "        index=\"area\",\n",
    "        columns=\"week\",\n",
    "        aggfunc=\"mean\",\n",
    "    )\n",
    "    .round(2)\n",
    "    .reset_index()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df_pivot' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-4-a99db6e1a1af>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_pivot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'df_pivot' is not defined"
     ]
    }
   ],
   "source": [
    "df_pivot.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Make the columns readable"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_pivot.columns = [\"pct_week_\" + str(col) for col in df_pivot.columns]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df_pivot' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-1-7cc4466fa477>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m df_pivot.rename(\n\u001b[0m\u001b[1;32m      2\u001b[0m     \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m\"pct_week_area\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"name\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"week_pop_2019\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"population\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m )\n",
      "\u001b[0;31mNameError\u001b[0m: name 'df_pivot' is not defined"
     ]
    }
   ],
   "source": [
    "df_pivot.rename(\n",
    "    columns={\"pct_week_area\": \"name\", \"week_pop_2019\": \"population\"}, inplace=True\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df_pivot' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-2-a99db6e1a1af>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_pivot\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'df_pivot' is not defined"
     ]
    }
   ],
   "source": [
    "df_pivot.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_pivot[\"name\"] = df_pivot[\"name\"].str.strip(\" \")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Merge with population and demographic figures/rates from [vaccine disparity project](https://www.latimes.com/projects/la-covid-vaccine-racial-disparities-by-neighborhood-map/)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "disparity = pd.read_csv(\"../../vaccine-disparity/_workspace/neighborhoods_grouped.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "disparity[\"name\"] = disparity[\"name\"].str.strip(\" \")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "disparity_clean = disparity[\n",
    "    [\n",
    "        \"name\",\n",
    "        \"total_pop\",\n",
    "        \"totalpop_17andunder_percent\",\n",
    "        \"totalpop_65andup_percent\",\n",
    "        \"nonwhite_percentage\",\n",
    "        \"majority_white\",\n",
    "        \"majority_hispanic\",\n",
    "        \"median_householdincome\",\n",
    "    ]\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge = pd.merge(df_pivot, disparity_clean, on=\"name\", indicator=True, how=\"left\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge[\"change_week9_to_15\"] = (\n",
    "    ((df_merge[\"pct_week_15\"] - df_merge[\"pct_week_9\"]) / df_merge[\"pct_week_9\"]) * 100\n",
    ").round()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge_slim = df_merge[\n",
    "    [\n",
    "        \"name\",\n",
    "        \"pct_week_9\",\n",
    "        \"pct_week_15\",\n",
    "        \"total_pop\",\n",
    "        \"totalpop_17andunder_percent\",\n",
    "        \"totalpop_65andup_percent\",\n",
    "        \"nonwhite_percentage\",\n",
    "        \"majority_white\",\n",
    "        \"majority_hispanic\",\n",
    "        \"median_householdincome\",\n",
    "        \"change_week9_to_15\",\n",
    "    ]\n",
    "].copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Filter the dataframe to larger places"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merge_slim[\"total_pop\"] = df_merge_slim[\"total_pop\"].fillna(\"\").astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "larger_places = df_merge_slim[df_merge_slim[\"total_pop\"] > 5000].copy()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Which places with populations of more than 5,000 saw the largest pct increase from week 8 to week 15?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "largest = (\n",
    "    larger_places[\n",
    "        (larger_places[\"pct_week_9\"].notnull())\n",
    "        & (larger_places[\"pct_week_15\"].notnull())\n",
    "    ]\n",
    "    .sort_values(\"change_week9_to_15\", ascending=False)\n",
    "    .head(10)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>pct_week_15</th>\n",
       "      <th>majority_hispanic</th>\n",
       "      <th>totalpop_17andunder_percent</th>\n",
       "      <th>totalpop_65andup_percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>161</th>\n",
       "      <td>Los Angeles - University Park</td>\n",
       "      <td>25.15</td>\n",
       "      <td>False</td>\n",
       "      <td>0.119700</td>\n",
       "      <td>0.061099</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273</th>\n",
       "      <td>Unincorporated - Littlerock/Pearblossom</td>\n",
       "      <td>21.74</td>\n",
       "      <td>True</td>\n",
       "      <td>0.284108</td>\n",
       "      <td>0.139551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>155</th>\n",
       "      <td>Los Angeles - Thai Town</td>\n",
       "      <td>27.97</td>\n",
       "      <td>False</td>\n",
       "      <td>0.107368</td>\n",
       "      <td>0.130282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>254</th>\n",
       "      <td>Unincorporated - Florence-Firestone</td>\n",
       "      <td>25.36</td>\n",
       "      <td>True</td>\n",
       "      <td>0.317636</td>\n",
       "      <td>0.068662</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>269</th>\n",
       "      <td>Unincorporated - Lennox</td>\n",
       "      <td>27.16</td>\n",
       "      <td>True</td>\n",
       "      <td>0.285876</td>\n",
       "      <td>0.064121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Cudahy</td>\n",
       "      <td>25.56</td>\n",
       "      <td>True</td>\n",
       "      <td>0.305616</td>\n",
       "      <td>0.069818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>Los Angeles - Alsace</td>\n",
       "      <td>29.92</td>\n",
       "      <td>True</td>\n",
       "      <td>0.209612</td>\n",
       "      <td>0.089146</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>90</th>\n",
       "      <td>Los Angeles - Green Meadows</td>\n",
       "      <td>23.61</td>\n",
       "      <td>True</td>\n",
       "      <td>0.308907</td>\n",
       "      <td>0.074795</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>Los Angeles - Florence-Firestone</td>\n",
       "      <td>22.81</td>\n",
       "      <td>True</td>\n",
       "      <td>0.319442</td>\n",
       "      <td>0.055256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>169</th>\n",
       "      <td>Los Angeles - Vernon Central</td>\n",
       "      <td>22.51</td>\n",
       "      <td>True</td>\n",
       "      <td>0.323692</td>\n",
       "      <td>0.062574</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        name  pct_week_15  majority_hispanic  \\\n",
       "161            Los Angeles - University Park        25.15              False   \n",
       "273  Unincorporated - Littlerock/Pearblossom        21.74               True   \n",
       "155                  Los Angeles - Thai Town        27.97              False   \n",
       "254      Unincorporated - Florence-Firestone        25.36               True   \n",
       "269                  Unincorporated - Lennox        27.16               True   \n",
       "20                                    Cudahy        25.56               True   \n",
       "49                      Los Angeles - Alsace        29.92               True   \n",
       "90               Los Angeles - Green Meadows        23.61               True   \n",
       "86          Los Angeles - Florence-Firestone        22.81               True   \n",
       "169             Los Angeles - Vernon Central        22.51               True   \n",
       "\n",
       "     totalpop_17andunder_percent  totalpop_65andup_percent  \n",
       "161                     0.119700                  0.061099  \n",
       "273                     0.284108                  0.139551  \n",
       "155                     0.107368                  0.130282  \n",
       "254                     0.317636                  0.068662  \n",
       "269                     0.285876                  0.064121  \n",
       "20                      0.305616                  0.069818  \n",
       "49                      0.209612                  0.089146  \n",
       "90                      0.308907                  0.074795  \n",
       "86                      0.319442                  0.055256  \n",
       "169                     0.323692                  0.062574  "
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "largest[\n",
    "    [\n",
    "        \"name\",\n",
    "        \"pct_week_15\",\n",
    "        \"majority_hispanic\",\n",
    "        \"totalpop_17andunder_percent\",\n",
    "        \"totalpop_65andup_percent\",\n",
    "    ]\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Mean income and older population share in places with largest pct increases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "42400"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round(\n",
    "    larger_places[\n",
    "        (larger_places[\"pct_week_9\"].notnull())\n",
    "        & (larger_places[\"pct_week_15\"].notnull())\n",
    "    ]\n",
    "    .sort_values(\"change_week9_to_15\", ascending=False)\n",
    "    .head(10)[\"median_householdincome\"]\n",
    "    .mean()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8.15"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round(\n",
    "    larger_places[\n",
    "        (larger_places[\"pct_week_9\"].notnull())\n",
    "        & (larger_places[\"pct_week_15\"].notnull())\n",
    "    ]\n",
    "    .sort_values(\"change_week9_to_15\", ascending=False)\n",
    "    .head(10)[\"totalpop_65andup_percent\"]\n",
    "    .mean()\n",
    "    * 100,\n",
    "    2,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Mean income and older population share in places with smalles pct increases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "141297"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round(\n",
    "    larger_places[\n",
    "        (larger_places[\"pct_week_9\"].notnull())\n",
    "        & (larger_places[\"pct_week_15\"].notnull())\n",
    "    ]\n",
    "    .sort_values(\"change_week9_to_15\", ascending=False)\n",
    "    .tail(10)[\"median_householdincome\"]\n",
    "    .mean()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "22.77"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "round(\n",
    "    larger_places[\n",
    "        (larger_places[\"pct_week_9\"].notnull())\n",
    "        & (larger_places[\"pct_week_15\"].notnull())\n",
    "    ]\n",
    "    .sort_values(\"change_week9_to_15\", ascending=False)\n",
    "    .tail(10)[\"totalpop_65andup_percent\"]\n",
    "    .mean()\n",
    "    * 100,\n",
    "    2,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Which places with populations of more than 5,000 saw the smallest pct increase from week 8 to week 14?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "smallest = (\n",
    "    larger_places[\n",
    "        (larger_places[\"pct_week_9\"].notnull())\n",
    "        & (larger_places[\"pct_week_15\"].notnull())\n",
    "    ]\n",
    "    .sort_values(\"change_week9_to_15\", ascending=False)\n",
    "    .tail(10)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>pct_week_15</th>\n",
       "      <th>majority_hispanic</th>\n",
       "      <th>totalpop_17andunder_percent</th>\n",
       "      <th>totalpop_65andup_percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>200</th>\n",
       "      <td>Rancho Palos Verdes</td>\n",
       "      <td>50.23</td>\n",
       "      <td>False</td>\n",
       "      <td>0.213775</td>\n",
       "      <td>0.248590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>180</th>\n",
       "      <td>Los Angeles - Westwood</td>\n",
       "      <td>35.76</td>\n",
       "      <td>False</td>\n",
       "      <td>0.088921</td>\n",
       "      <td>0.119817</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>203</th>\n",
       "      <td>Rolling Hills Estates</td>\n",
       "      <td>55.39</td>\n",
       "      <td>False</td>\n",
       "      <td>0.222468</td>\n",
       "      <td>0.247619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>195</th>\n",
       "      <td>Palos Verdes Estates</td>\n",
       "      <td>51.26</td>\n",
       "      <td>False</td>\n",
       "      <td>0.213455</td>\n",
       "      <td>0.270924</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>81</th>\n",
       "      <td>Los Angeles - Encino</td>\n",
       "      <td>54.11</td>\n",
       "      <td>False</td>\n",
       "      <td>0.207176</td>\n",
       "      <td>0.199472</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>Los Angeles - Century City</td>\n",
       "      <td>59.68</td>\n",
       "      <td>False</td>\n",
       "      <td>0.177379</td>\n",
       "      <td>0.234012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>Los Angeles - Cheviot Hills</td>\n",
       "      <td>63.66</td>\n",
       "      <td>False</td>\n",
       "      <td>0.211283</td>\n",
       "      <td>0.202283</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>Los Angeles - Beverly Crest</td>\n",
       "      <td>54.03</td>\n",
       "      <td>False</td>\n",
       "      <td>0.196094</td>\n",
       "      <td>0.237505</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Beverly Hills</td>\n",
       "      <td>51.69</td>\n",
       "      <td>False</td>\n",
       "      <td>0.202520</td>\n",
       "      <td>0.213604</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>Los Angeles - Bel Air</td>\n",
       "      <td>52.68</td>\n",
       "      <td>False</td>\n",
       "      <td>0.209939</td>\n",
       "      <td>0.303493</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            name  pct_week_15  majority_hispanic  \\\n",
       "200          Rancho Palos Verdes        50.23              False   \n",
       "180       Los Angeles - Westwood        35.76              False   \n",
       "203        Rolling Hills Estates        55.39              False   \n",
       "195         Palos Verdes Estates        51.26              False   \n",
       "81          Los Angeles - Encino        54.11              False   \n",
       "64    Los Angeles - Century City        59.68              False   \n",
       "67   Los Angeles - Cheviot Hills        63.66              False   \n",
       "55   Los Angeles - Beverly Crest        54.03              False   \n",
       "10                 Beverly Hills        51.69              False   \n",
       "54         Los Angeles - Bel Air        52.68              False   \n",
       "\n",
       "     totalpop_17andunder_percent  totalpop_65andup_percent  \n",
       "200                     0.213775                  0.248590  \n",
       "180                     0.088921                  0.119817  \n",
       "203                     0.222468                  0.247619  \n",
       "195                     0.213455                  0.270924  \n",
       "81                      0.207176                  0.199472  \n",
       "64                      0.177379                  0.234012  \n",
       "67                      0.211283                  0.202283  \n",
       "55                      0.196094                  0.237505  \n",
       "10                      0.202520                  0.213604  \n",
       "54                      0.209939                  0.303493  "
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "smallest[\n",
    "    [\n",
    "        \"name\",\n",
    "        \"pct_week_15\",\n",
    "        \"majority_hispanic\",\n",
    "        \"totalpop_17andunder_percent\",\n",
    "        \"totalpop_65andup_percent\",\n",
    "    ]\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Which places have the highest vaccination coverage right now?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>pct_week_15</th>\n",
       "      <th>majority_hispanic</th>\n",
       "      <th>totalpop_17andunder_percent</th>\n",
       "      <th>totalpop_65andup_percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>Los Angeles - Cheviot Hills</td>\n",
       "      <td>63.66</td>\n",
       "      <td>False</td>\n",
       "      <td>0.211283</td>\n",
       "      <td>0.202283</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>Los Angeles - Rancho Park</td>\n",
       "      <td>62.71</td>\n",
       "      <td>False</td>\n",
       "      <td>0.207814</td>\n",
       "      <td>0.163806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>Los Angeles - Century City</td>\n",
       "      <td>59.68</td>\n",
       "      <td>False</td>\n",
       "      <td>0.177379</td>\n",
       "      <td>0.234012</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>126</th>\n",
       "      <td>Los Angeles - Pacific Palisades</td>\n",
       "      <td>56.17</td>\n",
       "      <td>False</td>\n",
       "      <td>0.248622</td>\n",
       "      <td>0.236204</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>Los Angeles - Playa Vista</td>\n",
       "      <td>55.74</td>\n",
       "      <td>False</td>\n",
       "      <td>0.186668</td>\n",
       "      <td>0.104989</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>203</th>\n",
       "      <td>Rolling Hills Estates</td>\n",
       "      <td>55.39</td>\n",
       "      <td>False</td>\n",
       "      <td>0.222468</td>\n",
       "      <td>0.247619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>81</th>\n",
       "      <td>Los Angeles - Encino</td>\n",
       "      <td>54.11</td>\n",
       "      <td>False</td>\n",
       "      <td>0.207176</td>\n",
       "      <td>0.199472</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>Los Angeles - Beverly Crest</td>\n",
       "      <td>54.03</td>\n",
       "      <td>False</td>\n",
       "      <td>0.196094</td>\n",
       "      <td>0.237505</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>212</th>\n",
       "      <td>Sierra Madre</td>\n",
       "      <td>53.69</td>\n",
       "      <td>False</td>\n",
       "      <td>0.182263</td>\n",
       "      <td>0.228925</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>208</th>\n",
       "      <td>San Marino</td>\n",
       "      <td>53.29</td>\n",
       "      <td>False</td>\n",
       "      <td>0.227649</td>\n",
       "      <td>0.206610</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                name  pct_week_15  majority_hispanic  \\\n",
       "67       Los Angeles - Cheviot Hills        63.66              False   \n",
       "136        Los Angeles - Rancho Park        62.71              False   \n",
       "64        Los Angeles - Century City        59.68              False   \n",
       "126  Los Angeles - Pacific Palisades        56.17              False   \n",
       "134        Los Angeles - Playa Vista        55.74              False   \n",
       "203            Rolling Hills Estates        55.39              False   \n",
       "81              Los Angeles - Encino        54.11              False   \n",
       "55       Los Angeles - Beverly Crest        54.03              False   \n",
       "212                     Sierra Madre        53.69              False   \n",
       "208                       San Marino        53.29              False   \n",
       "\n",
       "     totalpop_17andunder_percent  totalpop_65andup_percent  \n",
       "67                      0.211283                  0.202283  \n",
       "136                     0.207814                  0.163806  \n",
       "64                      0.177379                  0.234012  \n",
       "126                     0.248622                  0.236204  \n",
       "134                     0.186668                  0.104989  \n",
       "203                     0.222468                  0.247619  \n",
       "81                      0.207176                  0.199472  \n",
       "55                      0.196094                  0.237505  \n",
       "212                     0.182263                  0.228925  \n",
       "208                     0.227649                  0.206610  "
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "larger_places[\n",
    "    (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n",
    "][\n",
    "    [\n",
    "        \"name\",\n",
    "        \"pct_week_15\",\n",
    "        \"majority_hispanic\",\n",
    "        \"totalpop_17andunder_percent\",\n",
    "        \"totalpop_65andup_percent\",\n",
    "    ]\n",
    "].sort_values(\n",
    "    \"pct_week_15\", ascending=False\n",
    ").head(\n",
    "    10\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Which places have the lowest vaccination coverage right now? "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>pct_week_15</th>\n",
       "      <th>majority_hispanic</th>\n",
       "      <th>totalpop_17andunder_percent</th>\n",
       "      <th>totalpop_65andup_percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>179</th>\n",
       "      <td>Los Angeles - Westlake</td>\n",
       "      <td>22.88</td>\n",
       "      <td>True</td>\n",
       "      <td>0.230150</td>\n",
       "      <td>0.093303</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>Los Angeles - Florence-Firestone</td>\n",
       "      <td>22.81</td>\n",
       "      <td>True</td>\n",
       "      <td>0.319442</td>\n",
       "      <td>0.055256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>169</th>\n",
       "      <td>Los Angeles - Vernon Central</td>\n",
       "      <td>22.51</td>\n",
       "      <td>True</td>\n",
       "      <td>0.323692</td>\n",
       "      <td>0.062574</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>Los Angeles - South Park</td>\n",
       "      <td>22.28</td>\n",
       "      <td>True</td>\n",
       "      <td>0.322496</td>\n",
       "      <td>0.056965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>Los Angeles - Central</td>\n",
       "      <td>22.14</td>\n",
       "      <td>True</td>\n",
       "      <td>0.306173</td>\n",
       "      <td>0.065168</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>Los Angeles - Century Palms/Cove</td>\n",
       "      <td>21.88</td>\n",
       "      <td>True</td>\n",
       "      <td>0.311102</td>\n",
       "      <td>0.073659</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>273</th>\n",
       "      <td>Unincorporated - Littlerock/Pearblossom</td>\n",
       "      <td>21.74</td>\n",
       "      <td>True</td>\n",
       "      <td>0.284108</td>\n",
       "      <td>0.139551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>172</th>\n",
       "      <td>Los Angeles - Watts</td>\n",
       "      <td>21.42</td>\n",
       "      <td>True</td>\n",
       "      <td>0.358358</td>\n",
       "      <td>0.055530</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>168</th>\n",
       "      <td>Los Angeles - Vermont Vista</td>\n",
       "      <td>20.70</td>\n",
       "      <td>True</td>\n",
       "      <td>0.319345</td>\n",
       "      <td>0.067000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>266</th>\n",
       "      <td>Unincorporated - Lake Los Angeles</td>\n",
       "      <td>19.11</td>\n",
       "      <td>True</td>\n",
       "      <td>0.302845</td>\n",
       "      <td>0.114554</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                        name  pct_week_15  majority_hispanic  \\\n",
       "179                   Los Angeles - Westlake        22.88               True   \n",
       "86          Los Angeles - Florence-Firestone        22.81               True   \n",
       "169             Los Angeles - Vernon Central        22.51               True   \n",
       "146                 Los Angeles - South Park        22.28               True   \n",
       "63                     Los Angeles - Central        22.14               True   \n",
       "65          Los Angeles - Century Palms/Cove        21.88               True   \n",
       "273  Unincorporated - Littlerock/Pearblossom        21.74               True   \n",
       "172                      Los Angeles - Watts        21.42               True   \n",
       "168              Los Angeles - Vermont Vista        20.70               True   \n",
       "266        Unincorporated - Lake Los Angeles        19.11               True   \n",
       "\n",
       "     totalpop_17andunder_percent  totalpop_65andup_percent  \n",
       "179                     0.230150                  0.093303  \n",
       "86                      0.319442                  0.055256  \n",
       "169                     0.323692                  0.062574  \n",
       "146                     0.322496                  0.056965  \n",
       "63                      0.306173                  0.065168  \n",
       "65                      0.311102                  0.073659  \n",
       "273                     0.284108                  0.139551  \n",
       "172                     0.358358                  0.055530  \n",
       "168                     0.319345                  0.067000  \n",
       "266                     0.302845                  0.114554  "
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "larger_places[\n",
    "    (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n",
    "][\n",
    "    [\n",
    "        \"name\",\n",
    "        \"pct_week_15\",\n",
    "        \"majority_hispanic\",\n",
    "        \"totalpop_17andunder_percent\",\n",
    "        \"totalpop_65andup_percent\",\n",
    "    ]\n",
    "].sort_values(\n",
    "    \"pct_week_15\", ascending=False\n",
    ").tail(\n",
    "    10\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Merge the places with the largest and smallest increases in vax coverage for graphics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "largest[\"category\"] = \"Largest increase\"\n",
    "smallest[\"category\"] = \"Smallest increase\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "large_small = pd.concat([largest, smallest]).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "large_small[\"name_clean\"] = (\n",
    "    large_small[\"name\"]\n",
    "    .str.replace(\"City of\", \"\", regex=False)\n",
    "    .str.replace(\"Los Angeles - \", \"\", regex=False)\n",
    "    .str.replace(\"Unincorporated - \", \"\", regex=False)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "large_small_slim = large_small[\n",
    "    [\n",
    "        \"category\",\n",
    "        \"name_clean\",\n",
    "        \"pct_week_9\",\n",
    "        \"pct_week_15\",\n",
    "        \"change_week9_to_15\",\n",
    "    ]\n",
    "].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "large_small_slim.rename(\n",
    "    columns={\n",
    "        \"change_week9_to_15\": \"% change\",\n",
    "        \"pct_week_15\": \"Week 15\",\n",
    "        \"pct_week_9\": \"Week 9\",\n",
    "        \"name_clean\": \"Place\",\n",
    "        \"category\": \"Category\",\n",
    "    },\n",
    "    inplace=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Merge with geography"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [],
   "source": [
    "places_geo = gpd.read_file(\n",
    "    \"input/cities-neighborhoods-unincorporated-la-county-no-islands.geojson\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "places_geo = places_geo.to_crs(\"EPSG:4326\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Clean up places names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "larger_places[\"name_clean\"] = (\n",
    "    larger_places[\"name\"]\n",
    "    .str.strip(\" \")\n",
    "    .str.replace(\"City of\", \"\", regex=False)\n",
    "    .str.replace(\"Los Angeles - \", \"\", regex=False)\n",
    "    .str.replace(\"Unincorporated - \", \"\", regex=False)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_geo = places_geo.merge(larger_places, left_on=\"NAME\", right_on=\"name_clean\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Slim down the geo file and simplify its features for graphics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_geo_slim = df_geo[\n",
    "    [\"name_clean\", \"pct_week_9\", \"pct_week_15\", \"change_week9_to_15\", \"geometry\"]\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_geo_slim[\n",
    "    [\"name_clean\", \"pct_week_9\", \"pct_week_15\", \"change_week9_to_15\", \"geometry\"]\n",
    "].to_file(\"output/vaccine-community/df_geo.geojson\", driver=\"GeoJSON\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_geo_slim[[\"name_clean\", \"change_week9_to_15\"]].to_csv(\n",
    "    \"output/vaccine-community/df_geo_metadata_datawrapper.csv\", index=False\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[o] Wrote output/vaccine-community/df_geo_simple.geojson\n"
     ]
    }
   ],
   "source": [
    "!mapshaper -i output/vaccine-community/df_geo.geojson -simplify percentage=.3 no-repair  -o output/vaccine-community/df_geo_simple.geojson \\"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "import datetime as dt\n",
    "\n",
    "today = dt.datetime.today().strftime(\"%m-%d-%Y\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "concatenated_df.to_csv(\n",
    "    \"output/vaccine-community/lac_vax_by_place_raw_table_\" + today + \".csv\",\n",
    "    index=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_pivot.to_csv(\n",
    "    \"output/vaccine-community/lac_vax_by_place_weeks_\" + today + \".csv\",\n",
    "    index=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "larger_places[\n",
    "    (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n",
    "].sort_values(\"change_week9_to_15\", ascending=False).head(10).to_csv(\n",
    "    \"output/vaccine-community/largest_vax_coverage_change_\" + today + \".csv\",\n",
    "    index=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "larger_places[\n",
    "    (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n",
    "].sort_values(\"change_week9_to_15\", ascending=False).tail(10).to_csv(\n",
    "    \"output/vaccine-community/smallest_vax_coverage_change_\" + today + \".csv\",\n",
    "    index=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "larger_places[\n",
    "    (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n",
    "][[\"name\", \"pct_week_15\", \"total_pop\"]].sort_values(\n",
    "    \"pct_week_15\", ascending=False\n",
    ").tail(\n",
    "    10\n",
    ").to_csv(\n",
    "    \"output/vaccine-community/lowest_coverage_\" + today + \".csv\",\n",
    "    index=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "larger_places[\n",
    "    (larger_places[\"pct_week_9\"].notnull()) & (larger_places[\"pct_week_15\"].notnull())\n",
    "][[\"name\", \"pct_week_15\", \"total_pop\"]].sort_values(\n",
    "    \"pct_week_15\", ascending=False\n",
    ").head(\n",
    "    10\n",
    ").to_csv(\n",
    "    \"output/vaccine-community/highest_coverage_\" + today + \".csv\",\n",
    "    index=False,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "large_small_slim.to_csv(\"output/vaccine-community/large_small_slim.csv\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}