{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from datascience import *\n", "import numpy as np\n", "\n", "%matplotlib inline\n", "import matplotlib.pyplot as plots\n", "plots.style.use('fivethirtyeight')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Let's apply some of our new skills to the Covid-19 data\n", "\n", "We'll first process the data as just as we did in last class. Then, we'll sample from all of the counties, and display that subset. We'll also use a loop to create a more informative label for each bubble in the map.\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "covid_table = Table.read_table(\"https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv\")\n", "county_geo = Table.read_table(\"https://raw.githubusercontent.com/jdlafferty/covid-19/master/data/geo-counties.csv\") \n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "first_date = '2021-02-21'\n", "\n", "# Some subsets of states to visualize:\n", "all_states = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',\n", " 'Colorado', 'Connecticut', 'Delaware', 'Florida', 'Georgia',\n", " 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas',\n", " 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts',\n", " 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',\n", " 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',\n", " 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',\n", " 'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina',\n", " 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont',\n", " 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']\n", "\n", "states = all_states\n", "\n", "recent_data = covid_table.where('date', are.above(first_date))\n", "recent_state_data = recent_data.where('state', are.contained_in(states))\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# remove extra columns\n", "data = recent_state_data.drop('date').drop('county').drop('state').drop('deaths')\n", "\n", "# exclude cases where fips is not known\n", "data = data.where('fips', are.above(0))\n", "\n", "# now, group by fips and form a list of the cumlative cases\n", "data = data.group('fips', list)\n", "\n", "# apply the difference function np.diff to get the new cases\n", "data = data.with_column('new cases', data.apply(np.diff, 'cases list'))\n", "data = data.drop('cases list')\n", "\n", "# Now average to get the average new cases in each county over the past week\n", "# We add a small amount .001 to avoid zeros, which the graphics handles badly \n", "new_cases = Table().with_columns('fips', data['fips'], \n", " 'new cases', data.apply(np.mean, 'new cases') + .001)\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
county state lat lon new cases
Autauga Alabama 32.5077 -86.651 25.6484
Baldwin Alabama 30.7698 -87.7827 97.2517
Blount Alabama 34.0128 -86.5337 23.7723
Bullock Alabama 32.0927 -85.7129 3.0864
Butler Alabama 32.0894 -88.2213 8.47207
Calhoun Alabama 33.7623 -85.8421 50.7338
Chambers Alabama 32.9188 -85.3938 13.8881
Cherokee Alabama 34.7555 -87.9734 9.08364
Chilton Alabama 32.866 -86.6652 18.4087
Choctaw Alabama 32.004 -88.2858 3.99274
\n", "

... (1631 rows omitted)

" ], "text/plain": [ "county | state | lat | lon | new cases\n", "Autauga | Alabama | 32.5077 | -86.651 | 25.6484\n", "Baldwin | Alabama | 30.7698 | -87.7827 | 97.2517\n", "Blount | Alabama | 34.0128 | -86.5337 | 23.7723\n", "Bullock | Alabama | 32.0927 | -85.7129 | 3.0864\n", "Butler | Alabama | 32.0894 | -88.2213 | 8.47207\n", "Calhoun | Alabama | 33.7623 | -85.8421 | 50.7338\n", "Chambers | Alabama | 32.9188 | -85.3938 | 13.8881\n", "Cherokee | Alabama | 34.7555 | -87.9734 | 9.08364\n", "Chilton | Alabama | 32.866 | -86.6652 | 18.4087\n", "Choctaw | Alabama | 32.004 | -88.2858 | 3.99274\n", "... (1631 rows omitted)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "state_geo = county_geo.where('state', are.contained_in(states)).sort('fips')\n", "new_cases_geo = state_geo.join('fips', new_cases)\n", "new_cases_geo = new_cases_geo.drop('fips')\n", "new_cases_geo" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Make this Notebook Trusted to load map: File -> Trust Notebook
" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "n = new_cases_geo.num_rows\n", "\n", "# A random sample of 100 counties across the US:\n", "rows = np.random.choice(np.arange(n), 100, replace=False)\n", "sample = new_cases_geo.take(rows)\n", "\n", "labels = []\n", "for i in np.arange(sample.num_rows):\n", " s = sample['county'][i] + \" County, \" + \\\n", " sample['state'][i] + \": \" + \\\n", " str(np.round(sample['new cases'][i],1))\n", " labels.append(s)\n", "\n", "dat = Table().with_columns('lat', sample['lat'], \n", " 'long', sample['lon'], \n", " 'labels', labels,\n", " 'areas', 10*sample['new cases'],\n", " 'colors', 'red')\n", "Circle.map_table(dat, weight=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can check our results by comparing to the *Times* numbers [reported here](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html#states)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }