{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# John Hopkins csse_covid_19_daily_reports data inconsistent\n", "\n", "https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_daily_reports/02-04-2020.csv\n", "\n", "versus\n", "\n", "https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_daily_reports/03-29-2020.csv\n", "\n", "\n", "\n", "Lets assume `Country_Region,Last_Update,Lat,Long_,Confirmed,Deaths,Recovered,Active`" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "sg = pd.read_csv(\"singapore.csv\", index_col=False, parse_dates=[\"Last_Update\"], names=[\"Country_Region\", \"Last_Update\", \"Lat\", \"Long\", \"Confirmed\", \"Deaths\",\"Recovered\",\"Active\"])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(195, 8)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sg.shape" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
| \n", " | Country_Region | \n", "Last_Update | \n", "Lat | \n", "Long | \n", "Confirmed | \n", "Deaths | \n", "Recovered | \n", "Active | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Singapore | \n", "2020-02-02 01:03:32 | \n", "18.0 | \n", "0.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 1 | \n", "Singapore | \n", "2020-02-02 01:03:32 | \n", "18.0 | \n", "0.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 2 | \n", "Singapore | \n", "2020-02-04 15:33:03 | \n", "24.0 | \n", "0.0 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| \n", " | Country_Region | \n", "Last_Update | \n", "Lat | \n", "Long | \n", "Confirmed | \n", "Deaths | \n", "Recovered | \n", "Active | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Singapore | \n", "2020-02-02 01:03:32 | \n", "18.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 1 | \n", "Singapore | \n", "2020-02-02 01:03:32 | \n", "18.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 2 | \n", "Singapore | \n", "2020-02-04 15:33:03 | \n", "24.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 3 | \n", "Singapore | \n", "2020-02-05 16:33:03 | \n", "28.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 4 | \n", "Singapore | \n", "2020-02-05 16:33:03 | \n", "28.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 190 | \n", "Singapore | \n", "2020-08-18 04:27:56 | \n", "1.2833 | \n", "103.8333 | \n", "55838 | \n", "27.0 | \n", "52350.0 | \n", "3461.0 | \n", "
| 191 | \n", "Singapore | \n", "2020-08-20 04:27:43 | \n", "1.2833 | \n", "103.8333 | \n", "56031 | \n", "27.0 | \n", "52810.0 | \n", "3194.0 | \n", "
| 192 | \n", "Singapore | \n", "2020-08-21 04:27:41 | \n", "1.2833 | \n", "103.8333 | \n", "56099 | \n", "27.0 | \n", "53119.0 | \n", "2953.0 | \n", "
| 193 | \n", "Singapore | \n", "2020-08-22 04:27:49 | \n", "1.2833 | \n", "103.8333 | \n", "56216 | \n", "27.0 | \n", "53651.0 | \n", "2538.0 | \n", "
| 194 | \n", "Singapore | \n", "2020-08-23 04:27:48 | \n", "1.2833 | \n", "103.8333 | \n", "56266 | \n", "27.0 | \n", "53920.0 | \n", "2319.0 | \n", "
195 rows × 8 columns
\n", "| \n", " | Country_Region | \n", "Last_Update | \n", "Lat | \n", "Long | \n", "Confirmed | \n", "Deaths | \n", "Recovered | \n", "Active | \n", "confirmed_day_before | \n", "daily_increase | \n", "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "Singapore | \n", "2020-02-02 01:03:32 | \n", "18.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
| 1 | \n", "Singapore | \n", "2020-02-02 01:03:32 | \n", "18.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "
| 2 | \n", "Singapore | \n", "2020-02-04 15:33:03 | \n", "24.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "
| 3 | \n", "Singapore | \n", "2020-02-05 16:33:03 | \n", "28.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "
| 4 | \n", "Singapore | \n", "2020-02-05 16:33:03 | \n", "28.0000 | \n", "0.0000 | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "0.0 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 190 | \n", "Singapore | \n", "2020-08-18 04:27:56 | \n", "1.2833 | \n", "103.8333 | \n", "55838 | \n", "27.0 | \n", "52350.0 | \n", "3461.0 | \n", "55747.0 | \n", "91.0 | \n", "
| 191 | \n", "Singapore | \n", "2020-08-20 04:27:43 | \n", "1.2833 | \n", "103.8333 | \n", "56031 | \n", "27.0 | \n", "52810.0 | \n", "3194.0 | \n", "55838.0 | \n", "193.0 | \n", "
| 192 | \n", "Singapore | \n", "2020-08-21 04:27:41 | \n", "1.2833 | \n", "103.8333 | \n", "56099 | \n", "27.0 | \n", "53119.0 | \n", "2953.0 | \n", "56031.0 | \n", "68.0 | \n", "
| 193 | \n", "Singapore | \n", "2020-08-22 04:27:49 | \n", "1.2833 | \n", "103.8333 | \n", "56216 | \n", "27.0 | \n", "53651.0 | \n", "2538.0 | \n", "56099.0 | \n", "117.0 | \n", "
| 194 | \n", "Singapore | \n", "2020-08-23 04:27:48 | \n", "1.2833 | \n", "103.8333 | \n", "56266 | \n", "27.0 | \n", "53920.0 | \n", "2319.0 | \n", "56216.0 | \n", "50.0 | \n", "
195 rows × 10 columns
\n", "