{ "cells": [ { "cell_type": "markdown", "metadata": { "tags": [ "setup" ] }, "source": [ "(c) 2016 - present. Enplus Advisors, Inc." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "tags": [ "setup" ] }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "pd.set_option('display.float_format', '{:,.1f}'.format)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "tags": [ "setup" ] }, "outputs": [], "source": [ "dat = pd.read_csv('data/weather-6m.csv')" ] }, { "cell_type": "markdown", "metadata": { "tags": [ "exercise" ] }, "source": [ "**Exercise:**\n", "\n", "Calculate the average `air_temp` by `month`." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "grp = dat.groupby('month')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "month\n", "1 -10.0\n", "2 -3.0\n", "3 2.1\n", "4 7.0\n", "5 14.0\n", "6 18.1\n", "Name: air_temp, dtype: float64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grp['air_temp'].mean()" ] }, { "cell_type": "markdown", "metadata": { "tags": [ "exercise" ] }, "source": [ "**Exercise:**\n", "\n", "Compute summary statistics on `air_temp` and `dew_point` using \n", "the `describe` method." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead tr th {\n", " text-align: left;\n", " }\n", "\n", " .dataframe thead tr:last-of-type th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr>\n", " <th></th>\n", " <th colspan=\"8\" halign=\"left\">air_temp</th>\n", " <th colspan=\"8\" halign=\"left\">dew_point</th>\n", " </tr>\n", " <tr>\n", " <th></th>\n", " <th>count</th>\n", " <th>mean</th>\n", " <th>std</th>\n", " <th>min</th>\n", " <th>25%</th>\n", " <th>50%</th>\n", " <th>75%</th>\n", " <th>max</th>\n", " <th>count</th>\n", " <th>mean</th>\n", " <th>std</th>\n", " <th>min</th>\n", " <th>25%</th>\n", " <th>50%</th>\n", " <th>75%</th>\n", " <th>max</th>\n", " </tr>\n", " <tr>\n", " <th>month</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>1</th>\n", " <td>712.0</td>\n", " <td>-10.0</td>\n", " <td>6.2</td>\n", " <td>-29.4</td>\n", " <td>-13.3</td>\n", " <td>-10.0</td>\n", " <td>-5.6</td>\n", " <td>2.8</td>\n", " <td>712.0</td>\n", " <td>-14.1</td>\n", " <td>6.8</td>\n", " <td>-32.8</td>\n", " <td>-18.3</td>\n", " <td>-13.9</td>\n", " <td>-8.9</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>644.0</td>\n", " <td>-3.0</td>\n", " <td>6.8</td>\n", " <td>-19.4</td>\n", " <td>-7.2</td>\n", " <td>-2.2</td>\n", " <td>1.7</td>\n", " <td>15.0</td>\n", " <td>644.0</td>\n", " <td>-7.3</td>\n", " <td>7.3</td>\n", " <td>-22.8</td>\n", " <td>-12.2</td>\n", " <td>-7.2</td>\n", " <td>-2.2</td>\n", " <td>8.3</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>713.0</td>\n", " <td>2.1</td>\n", " <td>6.7</td>\n", " <td>-13.3</td>\n", " <td>-1.7</td>\n", " <td>2.2</td>\n", " <td>5.6</td>\n", " <td>22.8</td>\n", " <td>713.0</td>\n", " <td>-3.4</td>\n", " <td>6.1</td>\n", " <td>-17.2</td>\n", " <td>-7.8</td>\n", " <td>-2.8</td>\n", " <td>0.6</td>\n", " <td>13.3</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>691.0</td>\n", " <td>7.0</td>\n", " <td>6.0</td>\n", " <td>-2.8</td>\n", " <td>2.8</td>\n", " <td>5.6</td>\n", " <td>9.4</td>\n", " <td>28.3</td>\n", " <td>691.0</td>\n", " <td>0.3</td>\n", " <td>6.1</td>\n", " <td>-13.3</td>\n", " <td>-3.9</td>\n", " <td>-1.1</td>\n", " <td>3.9</td>\n", " <td>16.7</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>713.0</td>\n", " <td>14.0</td>\n", " <td>5.1</td>\n", " <td>1.1</td>\n", " <td>10.6</td>\n", " <td>13.9</td>\n", " <td>17.2</td>\n", " <td>28.3</td>\n", " <td>713.0</td>\n", " <td>6.2</td>\n", " <td>4.6</td>\n", " <td>-6.1</td>\n", " <td>2.8</td>\n", " <td>6.7</td>\n", " <td>10.0</td>\n", " <td>18.3</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>688.0</td>\n", " <td>18.1</td>\n", " <td>6.0</td>\n", " <td>3.3</td>\n", " <td>13.8</td>\n", " <td>17.8</td>\n", " <td>22.8</td>\n", " <td>33.3</td>\n", " <td>688.0</td>\n", " <td>12.3</td>\n", " <td>5.5</td>\n", " <td>-3.3</td>\n", " <td>8.9</td>\n", " <td>11.7</td>\n", " <td>17.2</td>\n", " <td>23.3</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " air_temp dew_point \\\n", " count mean std min 25% 50% 75% max count mean std \n", "month \n", "1 712.0 -10.0 6.2 -29.4 -13.3 -10.0 -5.6 2.8 712.0 -14.1 6.8 \n", "2 644.0 -3.0 6.8 -19.4 -7.2 -2.2 1.7 15.0 644.0 -7.3 7.3 \n", "3 713.0 2.1 6.7 -13.3 -1.7 2.2 5.6 22.8 713.0 -3.4 6.1 \n", "4 691.0 7.0 6.0 -2.8 2.8 5.6 9.4 28.3 691.0 0.3 6.1 \n", "5 713.0 14.0 5.1 1.1 10.6 13.9 17.2 28.3 713.0 6.2 4.6 \n", "6 688.0 18.1 6.0 3.3 13.8 17.8 22.8 33.3 688.0 12.3 5.5 \n", "\n", " \n", " min 25% 50% 75% max \n", "month \n", "1 -32.8 -18.3 -13.9 -8.9 1.0 \n", "2 -22.8 -12.2 -7.2 -2.2 8.3 \n", "3 -17.2 -7.8 -2.8 0.6 13.3 \n", "4 -13.3 -3.9 -1.1 3.9 16.7 \n", "5 -6.1 2.8 6.7 10.0 18.3 \n", "6 -3.3 8.9 11.7 17.2 23.3 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grp[['air_temp', 'dew_point']].describe()" ] }, { "cell_type": "markdown", "metadata": { "tags": [ "exercise" ] }, "source": [ "**Exercise:**\n", "\n", "For January and February and 0 - 11 hours, calculate the average and standard deviation of `air_temp` grouping by month and hour of the day. Name your result columns `air_temp_mean` and `air_temp_sd`.\n", "\n", "Your result `DataFrame` should have 24 rows, the number of months (2) times the number of hours (12). \n", "\n", "$2 * 12 = 24$\n", "\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "idx = dat.month.isin([1, 2]) & (dat.hour < 12)\n", "grp2 = dat[idx].groupby(['month', 'hour'])\n", "hourly_temp = grp2.agg(\n", " air_temp_mean=('air_temp', 'mean'),\n", " air_temp_sd=('air_temp', 'std')\n", ")" ] }, { "cell_type": "markdown", "metadata": { "tags": [ "exercise" ] }, "source": [ "**Exercise:**\n", "\n", "By month, calculate quantiles for `air_temp` using the quantiles defined in `breaks`. \n", "\n", "Hint: Use the `quantile` method defined on a `Series` (`pd.Series.quantile`).\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "tags": [ "exercise" ] }, "outputs": [], "source": [ "breaks = [0.01, 0.25, 0.5, 0.75, 0.99]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th>air_temp</th>\n", " <th>0.0</th>\n", " <th>0.2</th>\n", " <th>0.5</th>\n", " <th>0.8</th>\n", " <th>1.0</th>\n", " </tr>\n", " <tr>\n", " <th>month</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>1</th>\n", " <td>-25.0</td>\n", " <td>-13.3</td>\n", " <td>-10.0</td>\n", " <td>-5.6</td>\n", " <td>1.1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>-18.3</td>\n", " <td>-7.2</td>\n", " <td>-2.2</td>\n", " <td>1.7</td>\n", " <td>12.8</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>-11.0</td>\n", " <td>-1.7</td>\n", " <td>2.2</td>\n", " <td>5.6</td>\n", " <td>19.3</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>-2.2</td>\n", " <td>2.8</td>\n", " <td>5.6</td>\n", " <td>9.4</td>\n", " <td>23.4</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>2.8</td>\n", " <td>10.6</td>\n", " <td>13.9</td>\n", " <td>17.2</td>\n", " <td>27.8</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>6.1</td>\n", " <td>13.8</td>\n", " <td>17.8</td>\n", " <td>22.8</td>\n", " <td>32.2</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ "air_temp 0.0 0.2 0.5 0.8 1.0\n", "month \n", "1 -25.0 -13.3 -10.0 -5.6 1.1\n", "2 -18.3 -7.2 -2.2 1.7 12.8\n", "3 -11.0 -1.7 2.2 5.6 19.3\n", "4 -2.2 2.8 5.6 9.4 23.4\n", "5 2.8 10.6 13.9 17.2 27.8\n", "6 6.1 13.8 17.8 22.8 32.2" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grp3 = dat.groupby('month')\n", "grp3.apply(lambda x: x.air_temp.quantile(breaks))" ] } ], "metadata": { "celltoolbar": "Tags", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 1 }