{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": [
     "setup"
    ]
   },
   "source": [
    "(c) 2016 - present. Enplus Advisors, Inc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "tags": [
     "setup"
    ]
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "pd.set_option('display.float_format', '{:,.1f}'.format)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "tags": [
     "setup"
    ]
   },
   "outputs": [],
   "source": [
    "dat = pd.read_csv('data/weather-6m.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": [
     "exercise"
    ]
   },
   "source": [
    "**Exercise:**\n",
    "\n",
    "Calculate the average `air_temp` by `month`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "grp = dat.groupby('month')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "month\n",
       "1   -10.0\n",
       "2    -3.0\n",
       "3     2.1\n",
       "4     7.0\n",
       "5    14.0\n",
       "6    18.1\n",
       "Name: air_temp, dtype: float64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grp['air_temp'].mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": [
     "exercise"
    ]
   },
   "source": [
    "**Exercise:**\n",
    "\n",
    "Compute summary statistics on `air_temp` and `dew_point` using \n",
    "the `describe` method."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"8\" halign=\"left\">air_temp</th>\n",
       "      <th colspan=\"8\" halign=\"left\">dew_point</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "      <th>count</th>\n",
       "      <th>mean</th>\n",
       "      <th>std</th>\n",
       "      <th>min</th>\n",
       "      <th>25%</th>\n",
       "      <th>50%</th>\n",
       "      <th>75%</th>\n",
       "      <th>max</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>month</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>712.0</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>6.2</td>\n",
       "      <td>-29.4</td>\n",
       "      <td>-13.3</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-5.6</td>\n",
       "      <td>2.8</td>\n",
       "      <td>712.0</td>\n",
       "      <td>-14.1</td>\n",
       "      <td>6.8</td>\n",
       "      <td>-32.8</td>\n",
       "      <td>-18.3</td>\n",
       "      <td>-13.9</td>\n",
       "      <td>-8.9</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>644.0</td>\n",
       "      <td>-3.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>-19.4</td>\n",
       "      <td>-7.2</td>\n",
       "      <td>-2.2</td>\n",
       "      <td>1.7</td>\n",
       "      <td>15.0</td>\n",
       "      <td>644.0</td>\n",
       "      <td>-7.3</td>\n",
       "      <td>7.3</td>\n",
       "      <td>-22.8</td>\n",
       "      <td>-12.2</td>\n",
       "      <td>-7.2</td>\n",
       "      <td>-2.2</td>\n",
       "      <td>8.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>713.0</td>\n",
       "      <td>2.1</td>\n",
       "      <td>6.7</td>\n",
       "      <td>-13.3</td>\n",
       "      <td>-1.7</td>\n",
       "      <td>2.2</td>\n",
       "      <td>5.6</td>\n",
       "      <td>22.8</td>\n",
       "      <td>713.0</td>\n",
       "      <td>-3.4</td>\n",
       "      <td>6.1</td>\n",
       "      <td>-17.2</td>\n",
       "      <td>-7.8</td>\n",
       "      <td>-2.8</td>\n",
       "      <td>0.6</td>\n",
       "      <td>13.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>691.0</td>\n",
       "      <td>7.0</td>\n",
       "      <td>6.0</td>\n",
       "      <td>-2.8</td>\n",
       "      <td>2.8</td>\n",
       "      <td>5.6</td>\n",
       "      <td>9.4</td>\n",
       "      <td>28.3</td>\n",
       "      <td>691.0</td>\n",
       "      <td>0.3</td>\n",
       "      <td>6.1</td>\n",
       "      <td>-13.3</td>\n",
       "      <td>-3.9</td>\n",
       "      <td>-1.1</td>\n",
       "      <td>3.9</td>\n",
       "      <td>16.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>713.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>1.1</td>\n",
       "      <td>10.6</td>\n",
       "      <td>13.9</td>\n",
       "      <td>17.2</td>\n",
       "      <td>28.3</td>\n",
       "      <td>713.0</td>\n",
       "      <td>6.2</td>\n",
       "      <td>4.6</td>\n",
       "      <td>-6.1</td>\n",
       "      <td>2.8</td>\n",
       "      <td>6.7</td>\n",
       "      <td>10.0</td>\n",
       "      <td>18.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>688.0</td>\n",
       "      <td>18.1</td>\n",
       "      <td>6.0</td>\n",
       "      <td>3.3</td>\n",
       "      <td>13.8</td>\n",
       "      <td>17.8</td>\n",
       "      <td>22.8</td>\n",
       "      <td>33.3</td>\n",
       "      <td>688.0</td>\n",
       "      <td>12.3</td>\n",
       "      <td>5.5</td>\n",
       "      <td>-3.3</td>\n",
       "      <td>8.9</td>\n",
       "      <td>11.7</td>\n",
       "      <td>17.2</td>\n",
       "      <td>23.3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      air_temp                                       dew_point            \\\n",
       "         count  mean std   min   25%   50%  75%  max     count  mean std   \n",
       "month                                                                      \n",
       "1        712.0 -10.0 6.2 -29.4 -13.3 -10.0 -5.6  2.8     712.0 -14.1 6.8   \n",
       "2        644.0  -3.0 6.8 -19.4  -7.2  -2.2  1.7 15.0     644.0  -7.3 7.3   \n",
       "3        713.0   2.1 6.7 -13.3  -1.7   2.2  5.6 22.8     713.0  -3.4 6.1   \n",
       "4        691.0   7.0 6.0  -2.8   2.8   5.6  9.4 28.3     691.0   0.3 6.1   \n",
       "5        713.0  14.0 5.1   1.1  10.6  13.9 17.2 28.3     713.0   6.2 4.6   \n",
       "6        688.0  18.1 6.0   3.3  13.8  17.8 22.8 33.3     688.0  12.3 5.5   \n",
       "\n",
       "                                   \n",
       "        min   25%   50%  75%  max  \n",
       "month                              \n",
       "1     -32.8 -18.3 -13.9 -8.9  1.0  \n",
       "2     -22.8 -12.2  -7.2 -2.2  8.3  \n",
       "3     -17.2  -7.8  -2.8  0.6 13.3  \n",
       "4     -13.3  -3.9  -1.1  3.9 16.7  \n",
       "5      -6.1   2.8   6.7 10.0 18.3  \n",
       "6      -3.3   8.9  11.7 17.2 23.3  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grp[['air_temp', 'dew_point']].describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": [
     "exercise"
    ]
   },
   "source": [
    "**Exercise:**\n",
    "\n",
    "For January and February and 0 - 11 hours, calculate the average and standard deviation of `air_temp` grouping by month and hour of the day. Name your result columns `air_temp_mean` and `air_temp_sd`.\n",
    "\n",
    "Your result `DataFrame` should have 24 rows, the number of months (2) times the number of hours (12). \n",
    "\n",
    "$2 * 12 = 24$\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "idx = dat.month.isin([1, 2]) & (dat.hour < 12)\n",
    "grp2 = dat[idx].groupby(['month', 'hour'])\n",
    "hourly_temp = grp2.agg(\n",
    "    air_temp_mean=('air_temp', 'mean'),\n",
    "    air_temp_sd=('air_temp', 'std')\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": [
     "exercise"
    ]
   },
   "source": [
    "**Exercise:**\n",
    "\n",
    "By month, calculate quantiles for `air_temp` using the quantiles defined in `breaks`. \n",
    "\n",
    "Hint: Use the `quantile` method defined on a `Series` (`pd.Series.quantile`).\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": [
     "exercise"
    ]
   },
   "outputs": [],
   "source": [
    "breaks = [0.01, 0.25, 0.5, 0.75, 0.99]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>air_temp</th>\n",
       "      <th>0.0</th>\n",
       "      <th>0.2</th>\n",
       "      <th>0.5</th>\n",
       "      <th>0.8</th>\n",
       "      <th>1.0</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>month</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>-25.0</td>\n",
       "      <td>-13.3</td>\n",
       "      <td>-10.0</td>\n",
       "      <td>-5.6</td>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-18.3</td>\n",
       "      <td>-7.2</td>\n",
       "      <td>-2.2</td>\n",
       "      <td>1.7</td>\n",
       "      <td>12.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-11.0</td>\n",
       "      <td>-1.7</td>\n",
       "      <td>2.2</td>\n",
       "      <td>5.6</td>\n",
       "      <td>19.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>-2.2</td>\n",
       "      <td>2.8</td>\n",
       "      <td>5.6</td>\n",
       "      <td>9.4</td>\n",
       "      <td>23.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>2.8</td>\n",
       "      <td>10.6</td>\n",
       "      <td>13.9</td>\n",
       "      <td>17.2</td>\n",
       "      <td>27.8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6.1</td>\n",
       "      <td>13.8</td>\n",
       "      <td>17.8</td>\n",
       "      <td>22.8</td>\n",
       "      <td>32.2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "air_temp   0.0   0.2   0.5  0.8  1.0\n",
       "month                               \n",
       "1        -25.0 -13.3 -10.0 -5.6  1.1\n",
       "2        -18.3  -7.2  -2.2  1.7 12.8\n",
       "3        -11.0  -1.7   2.2  5.6 19.3\n",
       "4         -2.2   2.8   5.6  9.4 23.4\n",
       "5          2.8  10.6  13.9 17.2 27.8\n",
       "6          6.1  13.8  17.8 22.8 32.2"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grp3 = dat.groupby('month')\n",
    "grp3.apply(lambda x: x.air_temp.quantile(breaks))"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Tags",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}