{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Basic Predictions\n", "===============\n", "\n", "This notebook tries to predict number of \"confirmed cases\" from patientdb available at http://api.covid19india.org/. \n", "\n", "Growth rate is computed as rate of change of confirmed cases and smoothened using a 7d rolling average.\n", "It is extrapolated using the following methods:\n", "* Quadartic curve fitting - fails to fit the curve correctly\n", "* Hyperbolic curve fitting using a neural net - fits well and asymptotes the growth rate at ~7%.\n", "\n", "The model is rudimentary and points towards a scenario where we fail to contain the epidemic and it eventually moves to stage 3/4." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from matplotlib import pyplot as plt\n", "from matplotlib.ticker import FuncFormatter\n", "%matplotlib inline\n", "import matplotlib as mpl\n", "from matplotlib.dates import date2num, AutoDateLocator, ConciseDateFormatter, DayLocator, AutoDateFormatter\n", "\n", "import datetime as dt\n", "import pandas as pd\n", "import numpy as np\n", "import requests\n", "\n", "from sklearn.neural_network import MLPRegressor" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Records: 81\n" ] } ], "source": [ "resp = requests.get(\"https://api.covid19india.org/data.json\")\n", "ts = resp.json()['cases_time_series']\n", "\n", "r = {\n", " \"date\": [],\n", " \"confirmed\": [],\n", " \"deceased\": [],\n", " \"recovered\": []\n", "}\n", "\n", "for d in ts:\n", " try:\n", " r['date'].append(dt.datetime.strptime(d['date'] + \"2020\", '%d %B %Y'))\n", " r['confirmed'].append(int(d['dailyconfirmed']))\n", " r['deceased'].append(int(d['dailydeceased']))\n", " r['recovered'].append(int(d['dailyrecovered']))\n", " except Exception as e:\n", " print(d['date'])\n", " tb.print_exc()\n", "\n", "df = pd.DataFrame(data=r, index=r['date'], columns=['confirmed', 'deceased', 'recovered'])\n", "df.sort_index(inplace=True)\n", "df = df[31:][:] #drop Feb data\n", "print(\"Records:\", len(df))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
confirmeddeceasedrecoveredtotalgrowthsma_growth
2020-05-1647941204012906465.5840285.370063
2020-05-1750491522538956955.5700205.186083
2020-05-18462813124821003234.8361985.113057
2020-05-19615414630321064776.1341875.270289
2020-05-20571613431131121935.3682965.321048
\n", "
" ], "text/plain": [ " confirmed deceased recovered total growth sma_growth\n", "2020-05-16 4794 120 4012 90646 5.584028 5.370063\n", "2020-05-17 5049 152 2538 95695 5.570020 5.186083\n", "2020-05-18 4628 131 2482 100323 4.836198 5.113057\n", "2020-05-19 6154 146 3032 106477 6.134187 5.270289\n", "2020-05-20 5716 134 3113 112193 5.368296 5.321048" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# cum sum\n", "df['total'] = df['confirmed'].cumsum()\n", "\n", "# calc growth rate & its moving avg\n", "df['growth'] = 100 * df['confirmed'] / (df['total'] - df['confirmed'])\n", "sma_gr = df['growth'].rolling(7).mean()\n", "df['sma_growth'] = sma_gr\n", "\n", "# daily plot\n", "ax = df.plot(y='confirmed', kind='line', title=\"Daily cases\", grid=True)\n", "df.tail()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
confirmedtotalsma_growthday_of_yr
2020-05-164794.090646.05.370063137
2020-05-175049.095695.05.186083138
2020-05-184628.0100323.05.113057139
2020-05-196154.0106477.05.270289140
2020-05-205716.0112193.05.321048141
\n", "
" ], "text/plain": [ " confirmed total sma_growth day_of_yr\n", "2020-05-16 4794.0 90646.0 5.370063 137\n", "2020-05-17 5049.0 95695.0 5.186083 138\n", "2020-05-18 4628.0 100323.0 5.113057 139\n", "2020-05-19 6154.0 106477.0 5.270289 140\n", "2020-05-20 5716.0 112193.0 5.321048 141" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create extrapolation dataframe\n", "last_date = df.index[-1]\n", "projections = pd.DataFrame(index=df.index.union(pd.date_range(start=last_date, periods=150, freq=\"1d\")))\n", "\n", "# fill with orig df\n", "projections['confirmed'] = df['confirmed']\n", "projections['total'] = df['total']\n", "projections['sma_growth'] = df['sma_growth']\n", "projections['day_of_yr'] = projections.index.dayofyear\n", "\n", "# filter data for curve fitting (aka training data)\n", "end_idx = len(df)\n", "start_idx = end_idx - 35 # use 21 day sma data for curve fitting\n", "fit_df = projections[start_idx:end_idx]\n", "fit_df.tail()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# fit polynomial curve\n", "coeffs = np.polyfit(fit_df.day_of_yr, fit_df.sma_growth, 2) # 2nd degree\n", "extrapolation_eqn = np.poly1d(coeffs)\n", "projections['projected_growth'] = extrapolation_eqn(projections.day_of_yr)\n", "\n", "# x axis ticks\n", "mj_l = AutoDateLocator()\n", "mj_f = ConciseDateFormatter(mj_l, show_offset=False)\n", "mn_l = DayLocator()\n", " \n", "ax = projections[start_idx:end_idx+5].plot(y='projected_growth', grid='True')\n", "_ = projections[start_idx:end_idx+10].plot(y='sma_growth', color='red', grid=True, title=\"Polynomial projections for growth rate\", ax=ax)\n", "\n", "ax.xaxis.set_major_formatter(mj_f)\n", "ax.xaxis.set_minor_locator(mn_l)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# train a neural network with hyperbolic activation function from day_of_yr => sma_growth\n", "regressor = MLPRegressor(hidden_layer_sizes = (100,), activation = 'logistic', solver='lbfgs', max_iter=500)\n", "# regressor = MLPRegressor(hidden_layer_sizes = (100,), activation = 'tanh', solver='lbfgs', max_iter=500)\n", "_ = regressor.fit(X=fit_df[['day_of_yr']], y=fit_df['sma_growth'])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "projections['projected_growth_nn'] = regressor.predict(X=projections[['day_of_yr']])\n", "ax = projections[start_idx:end_idx+150].plot(\n", " y='projected_growth_nn', \n", " grid=True\n", ")\n", "_ = projections[start_idx:end_idx+150].plot(\n", " y='sma_growth', \n", " color='red', \n", " title=\"NeuralNet projections for growth rate\", \n", " ax=ax, \n", " grid=True\n", ")\n", "\n", "ax.xaxis.set_major_formatter(mj_f)\n", "ax.xaxis.set_minor_locator(mn_l)\n", "ax.tick_params(labeltop=False, labelright=True)\n", "\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# compute total and daily numbers based on projected rate\n", "prev_row_total = 0\n", "def project(row):\n", " global prev_row_total\n", " if not np.isnan(row['total']):\n", " prev_row_total = row['total']\n", " return row['total']\n", " prev_row_total *= (1 + row['projected_growth_nn']/100)\n", " return prev_row_total\n", "\n", "projections['projected_total'] = projections.apply(project, axis=1).astype('int64')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Date\t Projected Cases\n", "May 31\t \t 193477\n", "June 30 \t 642672\n", "July 31 \t 1599555\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# plot 100 days from feb 1\n", "ax = projections[0:100].plot(\n", " y='projected_total', \n", " title=\"Projected total cases\", \n", " grid=True,\n", " figsize=(8,3)\n", ")\n", "ax.xaxis.set_major_formatter(mj_f)\n", "ax.xaxis.set_minor_locator(mn_l)\n", "ax.axvline(x=dt.datetime(day=18, month=5, year=2020), color='red')\n", "\n", "#print(\"Projected death Count on \",projections.index[91].strftime(\"%B %d\") , \"is\", projections['projected_total'][91])\n", "print(\"Date\\t Projected Cases\")\n", "print(projections.index[91].strftime(\"%B %d\\t\"),\"\\t\", projections['projected_total'][91])\n", "print(projections.index[121].strftime(\"%B %d\"),\"\\t\", projections['projected_total'][121])\n", "print(projections.index[152].strftime(\"%B %d\"),\"\\t\", projections['projected_total'][152])\n", "#projections[90:96]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# plot next 100 days \n", "_ = projections[100:].plot(y='projected_total', title=\"Projected total cases\", grid='True')\n", "\n", "#projections[130:10]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }