{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If you are running on Binder, use the Voilà button! or you can change your url from `/notebooks/2-voila.ipynb` to `/voila/render/2-voila.ipynb`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Back to our Motivating example: CO$_2$ at Mauna Loa  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import ipywidgets as widgets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib import rcParams\n",
    "rcParams[\"font.size\"] = 14"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "co2_data_source = \"./data/monthly_in_situ_co2_mlo.csv\"\n",
    "co2_data_full = pd.read_csv(\n",
    "    co2_data_source, skiprows=np.arange(0, 56), na_values=\"-99.99\"\n",
    ")\n",
    "\n",
    "co2_data_full.columns = [\n",
    "    \"year\", \"month\", \"date (int)\", \"date\", \"observed co2\", \"seasonally adjusted\",\n",
    "    \"fit\", \"seasonally adjusted fit\", \"co2 filled\", \"seasonally adjusted filled\" \n",
    "]\n",
    "\n",
    "co2_data = co2_data_full.dropna()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data_between(data=co2_data, date_range=None, data_type=\"seasonally adjusted\"):\n",
    "    \"\"\"\n",
    "    A function to fetch data between year_min and year_max  \n",
    "    \"\"\"\n",
    "    if date_range is None:\n",
    "        date_range = data[\"date\"].min(), data[\"date\"].max()\n",
    "\n",
    "    # find the data between the minimimum and maximum years\n",
    "    indices = (data[\"date\"] >= date_range[0]) & (data[\"date\"] <= date_range[1]) \n",
    "    return data[\"date\"][indices], data[data_type][indices]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_co2_data(data=co2_data, date_range=None, data_type=\"seasonally adjusted\", ax=None):\n",
    "    \"\"\"\n",
    "    A function that we can use to plot data between year_min and year_max\n",
    "    \"\"\"\n",
    "    \n",
    "    # create a figure if one isn't supplied\n",
    "    if ax is None:\n",
    "        fig, ax = plt.subplots(1, 1, figsize=(8, 5))\n",
    "        \n",
    "    dates, data_between = get_data_between(data, date_range, data_type)\n",
    "        \n",
    "    # plot data\n",
    "    ax.plot(dates, data_between, '.',  ms=8)\n",
    "    ax.grid()\n",
    "    ax.set_xlabel(f\"Year\")\n",
    "    ax.set_ylabel(f\"CO$_2$ [ppm]\")\n",
    "    \n",
    "    return ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_line(dates, slope, intercept, ax=None, label=None):\n",
    "    \"\"\"\n",
    "    A function to add a line to a plot\n",
    "    \"\"\"    \n",
    "    # create a figure if one isn't supplied\n",
    "    if ax is None:\n",
    "        fig, ax = plt.subplots(1, 1, figsize=(8, 5))\n",
    "    \n",
    "    y = slope * (dates - co2_data[\"date\"].min()) + intercept\n",
    "    ax.plot(dates, y, label=label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_fit_co2_data(slope, intercept, year_min=1958, year_max=2020, data_type=\"seasonally adjusted\"):\n",
    "    \"\"\"\n",
    "    This function creates an interactive widget where we can fit a curve to data\n",
    "    \"\"\"\n",
    "    fig, ax = plt.subplots(1, 1, figsize=(8, 5))\n",
    "    plot_co2_data(co2_data, [year_min, year_max], data_type, ax=ax)\n",
    "    add_line(np.r_[year_min, year_max], slope, intercept, ax=ax)\n",
    "    return ax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_co2(slope, intercept, prediction_date):\n",
    "    \"\"\"\n",
    "    based on an estimated slope, and intercept use a linear \n",
    "    model to predict CO2 concentration\n",
    "    \"\"\"\n",
    "    return slope * (prediction_date-co2_data[\"date\"].min()) + intercept"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def linear_model_co2(\n",
    "    data_type=\"seasonally adjusted\", years=np.r_[1958, 2020], \n",
    "    slope=1, intercept=300, year_predict=2030, show_prediction=False\n",
    "):\n",
    "    \"\"\"\n",
    "    Generate a plot with the co2 data, our linear model and the prediction\n",
    "    \"\"\"\n",
    "    fig, ax = plt.subplots(1, 1, figsize=(8, 5))\n",
    "    plot_co2_data(co2_data, years, data_type, ax=ax)\n",
    "    add_line(years, slope, intercept, ax=ax)\n",
    "    \n",
    "    if show_prediction is True:\n",
    "        prediction = predict_co2(slope, intercept, year_predict)\n",
    "        ax.plot(year_predict, prediction, 'C1o')\n",
    "        ax.text(\n",
    "            year_predict - 1, prediction, \n",
    "            f\"{prediction:1.2f} ppm\", ha=\"right\", va=\"center\"\n",
    "        )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Q1:** Within small enough regions, the data follow an approximately linear trend, so a linear model has some predictive power. Out to which year would you trust the model built with the data from 1958 - 1963? Where does it start to break down?\n",
    "\n",
    "**Q2:** How far out would you trust our predictions with data from 2015 - 2020? Would you trust our model to predict CO$_2$ in the year 2050? \n",
    "\n",
    "**Q3:** How might you approach building a model to fit all of our data? \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Parameter choices for the sidgets\n",
    "year_max = 2050  # maximum value for the sliders\n",
    "years_initial = [1958, 1963]  # years we focus on initially \n",
    "year_predict_initial = 2030\n",
    "\n",
    "# construct our widget\n",
    "w = widgets.interactive(\n",
    "    linear_model_co2, \n",
    "    data_type=widgets.ToggleButtons(\n",
    "        options=[\"observed co2\", \"seasonally adjusted\"], value=\"seasonally adjusted\"\n",
    "    ),\n",
    "    years=widgets.IntRangeSlider(\n",
    "        min=co2_data[\"date\"].min(), max=year_max, value=years_initial \n",
    "    ),\n",
    "    slope=widgets.FloatSlider(\n",
    "        min=0, max=5, step=0.1, value=2\n",
    "    ),\n",
    "    intercept=widgets.FloatSlider(\n",
    "        min=co2_data[\"observed co2\"].min() - 5,\n",
    "        max=co2_data[\"observed co2\"].min() + 20,\n",
    "        step=0.25\n",
    "    ),\n",
    "    year_predict=widgets.IntSlider(\n",
    "        min=co2_data[\"date\"].min(), max=year_max, value=year_predict_initial\n",
    "    ),\n",
    "    show_prediction=widgets.Checkbox(\n",
    "        value=False\n",
    "    )\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "w"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}