{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Notebook accompanying pbpython article - [Pandas Grouper and Agg Functions Explained](http://pbpython.com/pandas-grouper-agg.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import collections"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read in the sample sales file then convert the date column to a proper date time column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_excel(\"https://github.com/chris1610/pbpython/blob/master/data/sample-salesv3.xlsx?raw=True\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>account number</th>\n",
       "      <th>name</th>\n",
       "      <th>sku</th>\n",
       "      <th>quantity</th>\n",
       "      <th>unit price</th>\n",
       "      <th>ext price</th>\n",
       "      <th>date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>740150</td>\n",
       "      <td>Barton LLC</td>\n",
       "      <td>B1-20000</td>\n",
       "      <td>39</td>\n",
       "      <td>86.69</td>\n",
       "      <td>3380.91</td>\n",
       "      <td>2014-01-01 07:21:51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>714466</td>\n",
       "      <td>Trantow-Barrows</td>\n",
       "      <td>S2-77896</td>\n",
       "      <td>-1</td>\n",
       "      <td>63.16</td>\n",
       "      <td>-63.16</td>\n",
       "      <td>2014-01-01 10:00:47</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>218895</td>\n",
       "      <td>Kulas Inc</td>\n",
       "      <td>B1-69924</td>\n",
       "      <td>23</td>\n",
       "      <td>90.70</td>\n",
       "      <td>2086.10</td>\n",
       "      <td>2014-01-01 13:24:58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>307599</td>\n",
       "      <td>Kassulke, Ondricka and Metz</td>\n",
       "      <td>S1-65481</td>\n",
       "      <td>41</td>\n",
       "      <td>21.05</td>\n",
       "      <td>863.05</td>\n",
       "      <td>2014-01-01 15:05:22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>412290</td>\n",
       "      <td>Jerde-Hilpert</td>\n",
       "      <td>S2-34077</td>\n",
       "      <td>6</td>\n",
       "      <td>83.21</td>\n",
       "      <td>499.26</td>\n",
       "      <td>2014-01-01 23:26:55</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   account number                         name       sku  quantity  \\\n",
       "0          740150                   Barton LLC  B1-20000        39   \n",
       "1          714466              Trantow-Barrows  S2-77896        -1   \n",
       "2          218895                    Kulas Inc  B1-69924        23   \n",
       "3          307599  Kassulke, Ondricka and Metz  S1-65481        41   \n",
       "4          412290                Jerde-Hilpert  S2-34077         6   \n",
       "\n",
       "   unit price  ext price                date  \n",
       "0       86.69    3380.91 2014-01-01 07:21:51  \n",
       "1       63.16     -63.16 2014-01-01 10:00:47  \n",
       "2       90.70    2086.10 2014-01-01 13:24:58  \n",
       "3       21.05     863.05 2014-01-01 15:05:22  \n",
       "4       83.21     499.26 2014-01-01 23:26:55  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[\"date\"] = pd.to_datetime(df['date'])\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "account number             int64\n",
       "name                      object\n",
       "sku                       object\n",
       "quantity                   int64\n",
       "unit price               float64\n",
       "ext price                float64\n",
       "date              datetime64[ns]\n",
       "dtype: object"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Example showing how resample can be used along with set_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "date\n",
       "2014-01-31    185361.66\n",
       "2014-02-28    146211.62\n",
       "2014-03-31    203921.38\n",
       "2014-04-30    174574.11\n",
       "2014-05-31    165418.55\n",
       "2014-06-30    174089.33\n",
       "2014-07-31    191662.11\n",
       "2014-08-31    153778.59\n",
       "2014-09-30    168443.17\n",
       "2014-10-31    171495.32\n",
       "2014-11-30    119961.22\n",
       "2014-12-31    163867.26\n",
       "Freq: M, Name: ext price, dtype: float64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.set_index('date').resample('M')[\"ext price\"].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A more complex example with a groupby"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "name                             date      \n",
       "Barton LLC                       2014-01-31     6177.57\n",
       "                                 2014-02-28    12218.03\n",
       "                                 2014-03-31     3513.53\n",
       "                                 2014-04-30    11474.20\n",
       "                                 2014-05-31    10220.17\n",
       "                                 2014-06-30    10463.73\n",
       "                                 2014-07-31     6750.48\n",
       "                                 2014-08-31    17541.46\n",
       "                                 2014-09-30    14053.61\n",
       "                                 2014-10-31     9351.68\n",
       "                                 2014-11-30     4901.14\n",
       "                                 2014-12-31     2772.90\n",
       "Cronin, Oberbrunner and Spencer  2014-01-31     1141.75\n",
       "                                 2014-02-28    13976.26\n",
       "                                 2014-03-31    11691.62\n",
       "                                 2014-04-30     3685.44\n",
       "                                 2014-05-31     6760.11\n",
       "                                 2014-06-30     5379.67\n",
       "                                 2014-07-31     6020.30\n",
       "                                 2014-08-31     5399.58\n",
       "Name: ext price, dtype: float64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.set_index('date').groupby('name')[\"ext price\"].resample(\"M\").sum().head(20)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A simpler example using pd.Grouper"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "name                             date      \n",
       "Barton LLC                       2014-01-31     6177.57\n",
       "                                 2014-02-28    12218.03\n",
       "                                 2014-03-31     3513.53\n",
       "                                 2014-04-30    11474.20\n",
       "                                 2014-05-31    10220.17\n",
       "                                 2014-06-30    10463.73\n",
       "                                 2014-07-31     6750.48\n",
       "                                 2014-08-31    17541.46\n",
       "                                 2014-09-30    14053.61\n",
       "                                 2014-10-31     9351.68\n",
       "                                 2014-11-30     4901.14\n",
       "                                 2014-12-31     2772.90\n",
       "Cronin, Oberbrunner and Spencer  2014-01-31     1141.75\n",
       "                                 2014-02-28    13976.26\n",
       "                                 2014-03-31    11691.62\n",
       "                                 2014-04-30     3685.44\n",
       "                                 2014-05-31     6760.11\n",
       "                                 2014-06-30     5379.67\n",
       "                                 2014-07-31     6020.30\n",
       "                                 2014-08-31     5399.58\n",
       "Name: ext price, dtype: float64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['name', pd.Grouper(key='date', freq='M')])['ext price'].sum().head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "name                             date      \n",
       "Barton LLC                       2014-12-31    109438.50\n",
       "Cronin, Oberbrunner and Spencer  2014-12-31     89734.55\n",
       "Frami, Hills and Schmidt         2014-12-31    103569.59\n",
       "Fritsch, Russel and Anderson     2014-12-31    112214.71\n",
       "Halvorson, Crona and Champlin    2014-12-31     70004.36\n",
       "Herman LLC                       2014-12-31     82865.00\n",
       "Jerde-Hilpert                    2014-12-31    112591.43\n",
       "Kassulke, Ondricka and Metz      2014-12-31     86451.07\n",
       "Keeling LLC                      2014-12-31    100934.30\n",
       "Kiehn-Spinka                     2014-12-31     99608.77\n",
       "Koepp Ltd                        2014-12-31    103660.54\n",
       "Kuhn-Gusikowski                  2014-12-31     91094.28\n",
       "Kulas Inc                        2014-12-31    137351.96\n",
       "Pollich LLC                      2014-12-31     87347.18\n",
       "Purdy-Kunde                      2014-12-31     77898.21\n",
       "Sanford and Sons                 2014-12-31     98822.98\n",
       "Stokes LLC                       2014-12-31     91535.92\n",
       "Trantow-Barrows                  2014-12-31    123381.38\n",
       "White-Trantow                    2014-12-31    135841.99\n",
       "Will LLC                         2014-12-31    104437.60\n",
       "Name: ext price, dtype: float64"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['name', pd.Grouper(key='date', freq='A-DEC')])['ext price'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This works but is kind of slow and probably not that useful for this data set\n",
    "#df.groupby(['name', pd.Grouper(key='date', freq='60s')])['ext price'].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Some more examples using various off set alisases - http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "name        date      \n",
       "Barton LLC  2014-01-06    3380.91\n",
       "            2014-01-20     224.86\n",
       "            2014-01-27    2423.96\n",
       "            2014-02-03     147.84\n",
       "            2014-02-10     217.14\n",
       "            2014-02-17    8442.09\n",
       "            2014-03-03    3558.80\n",
       "            2014-03-10    1763.49\n",
       "            2014-03-17    1750.04\n",
       "            2014-04-07    1561.14\n",
       "            2014-04-14    4702.61\n",
       "            2014-04-21     836.63\n",
       "            2014-04-28    2540.79\n",
       "            2014-05-05    4109.99\n",
       "            2014-05-12    1671.83\n",
       "            2014-05-26    3251.43\n",
       "            2014-06-02    6112.87\n",
       "            2014-06-09     677.35\n",
       "            2014-06-16    2798.00\n",
       "            2014-06-23    3895.46\n",
       "            2014-07-14    1033.26\n",
       "            2014-07-21    3989.22\n",
       "            2014-08-04    7384.80\n",
       "            2014-08-11      84.18\n",
       "            2014-08-18    3857.10\n",
       "            2014-08-25    2925.94\n",
       "            2014-09-01    5017.44\n",
       "            2014-09-08    1319.28\n",
       "            2014-09-15    2608.50\n",
       "            2014-09-22    7371.67\n",
       "                           ...   \n",
       "Will LLC    2014-03-31    2595.78\n",
       "            2014-04-07     991.99\n",
       "            2014-04-14    2069.20\n",
       "            2014-04-21    3007.03\n",
       "            2014-04-28      26.72\n",
       "            2014-05-12    2220.74\n",
       "            2014-05-19    3250.88\n",
       "            2014-06-02    6385.33\n",
       "            2014-06-16    1440.96\n",
       "            2014-06-23      56.15\n",
       "            2014-06-30     922.41\n",
       "            2014-07-07    2985.85\n",
       "            2014-07-14    3073.44\n",
       "            2014-07-21    3951.27\n",
       "            2014-07-28    1006.98\n",
       "            2014-08-11     773.90\n",
       "            2014-08-18     254.04\n",
       "            2014-08-25     411.88\n",
       "            2014-09-01    2020.57\n",
       "            2014-09-29    2325.42\n",
       "            2014-10-06     371.52\n",
       "            2014-10-20      99.47\n",
       "            2014-10-27    1616.14\n",
       "            2014-11-03    5768.60\n",
       "            2014-11-10    1734.08\n",
       "            2014-12-01     705.96\n",
       "            2014-12-08    4301.00\n",
       "            2014-12-15    3502.09\n",
       "            2014-12-22    2993.40\n",
       "            2015-01-05    1764.72\n",
       "Name: ext price, Length: 805, dtype: float64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['name', pd.Grouper(key='date', freq='W-MON')])['ext price'].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "name                             sku       date      \n",
       "Barton LLC                       B1-04202  2014-12-31     2433.20\n",
       "                                 B1-05914  2014-12-31      324.72\n",
       "                                 B1-20000  2014-12-31     7212.11\n",
       "                                 B1-33087  2014-12-31     4159.97\n",
       "                                 B1-33364  2014-12-31     4369.19\n",
       "                                 B1-38851  2014-12-31     6627.93\n",
       "                                 B1-50809  2014-12-31     5087.44\n",
       "                                 B1-53102  2014-12-31      431.27\n",
       "                                 B1-53636  2014-12-31     3737.59\n",
       "                                 B1-65551  2014-12-31     4703.54\n",
       "                                 B1-69924  2014-12-31     1634.29\n",
       "                                 B1-86481  2014-12-31     1457.80\n",
       "                                 S1-06532  2014-12-31     8346.20\n",
       "                                 S1-27722  2014-12-31     5840.43\n",
       "                                 S1-30248  2014-12-31      887.97\n",
       "                                 S1-47412  2014-12-31     5176.74\n",
       "                                 S1-50961  2014-12-31     1883.31\n",
       "                                 S1-65481  2014-12-31     3352.62\n",
       "                                 S1-82801  2014-12-31     5453.39\n",
       "                                 S1-93683  2014-12-31     1715.28\n",
       "                                 S2-10342  2014-12-31    12337.83\n",
       "                                 S2-11481  2014-12-31      728.50\n",
       "                                 S2-16558  2014-12-31      623.07\n",
       "                                 S2-34077  2014-12-31     5726.16\n",
       "                                 S2-77896  2014-12-31     6156.02\n",
       "                                 S2-78676  2014-12-31     2616.60\n",
       "                                 S2-82423  2014-12-31     3735.43\n",
       "                                 S2-83881  2014-12-31     2679.90\n",
       "Cronin, Oberbrunner and Spencer  B1-04202  2014-12-31     5054.84\n",
       "                                 B1-05914  2014-12-31     4003.64\n",
       "                                                           ...   \n",
       "White-Trantow                    S2-82423  2014-12-31     5863.69\n",
       "                                 S2-83881  2014-12-31     6506.07\n",
       "Will LLC                         B1-04202  2014-12-31      464.33\n",
       "                                 B1-05914  2014-12-31     5935.53\n",
       "                                 B1-20000  2014-12-31     4113.19\n",
       "                                 B1-33087  2014-12-31     3711.06\n",
       "                                 B1-33364  2014-12-31    11599.40\n",
       "                                 B1-38851  2014-12-31     1475.09\n",
       "                                 B1-53102  2014-12-31     6995.58\n",
       "                                 B1-53636  2014-12-31      532.80\n",
       "                                 B1-65551  2014-12-31     3039.01\n",
       "                                 B1-69924  2014-12-31    10021.68\n",
       "                                 B1-86481  2014-12-31     1770.34\n",
       "                                 S1-06532  2014-12-31     3314.72\n",
       "                                 S1-27722  2014-12-31     4493.11\n",
       "                                 S1-30248  2014-12-31      975.52\n",
       "                                 S1-47412  2014-12-31     1642.68\n",
       "                                 S1-50961  2014-12-31     3513.62\n",
       "                                 S1-65481  2014-12-31      593.46\n",
       "                                 S1-82801  2014-12-31     2034.92\n",
       "                                 S1-93683  2014-12-31     9559.57\n",
       "                                 S2-00301  2014-12-31     2117.33\n",
       "                                 S2-10342  2014-12-31     4918.55\n",
       "                                 S2-11481  2014-12-31     4324.79\n",
       "                                 S2-16558  2014-12-31      413.52\n",
       "                                 S2-23246  2014-12-31     1893.25\n",
       "                                 S2-34077  2014-12-31      991.99\n",
       "                                 S2-77896  2014-12-31     5321.87\n",
       "                                 S2-78676  2014-12-31     5016.76\n",
       "                                 S2-83881  2014-12-31     3653.93\n",
       "Name: ext price, Length: 544, dtype: float64"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.groupby(['name', 'sku', pd.Grouper(key='date', freq='A-DEC')])['ext price'].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now show how to use the new .agg function\n",
    "\n",
    "First, how to get summary stats without agg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ext price    2018784.32\n",
       "quantity       36463.00\n",
       "dtype: float64"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[[\"ext price\", \"quantity\"]].sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ext price    1345.856213\n",
       "quantity       24.308667\n",
       "dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[[\"ext price\", \"quantity\"]].mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using .agg for sums and means across multiple columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ext price</th>\n",
       "      <th>quantity</th>\n",
       "      <th>unit price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>sum</th>\n",
       "      <td>2.018784e+06</td>\n",
       "      <td>36463.000000</td>\n",
       "      <td>82511.290000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.345856e+03</td>\n",
       "      <td>24.308667</td>\n",
       "      <td>55.007527</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         ext price      quantity    unit price\n",
       "sum   2.018784e+06  36463.000000  82511.290000\n",
       "mean  1.345856e+03     24.308667     55.007527"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[[\"ext price\", \"quantity\", \"unit price\"]].agg(['sum', 'mean'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Passing a dictionary containing different operations per column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ext price</th>\n",
       "      <th>quantity</th>\n",
       "      <th>unit price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.345856e+03</td>\n",
       "      <td>24.308667</td>\n",
       "      <td>55.007527</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sum</th>\n",
       "      <td>2.018784e+06</td>\n",
       "      <td>36463.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         ext price      quantity  unit price\n",
       "mean  1.345856e+03     24.308667   55.007527\n",
       "sum   2.018784e+06  36463.000000         NaN"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.agg({'ext price': ['sum', 'mean'], 'quantity': ['sum', 'mean'], 'unit price': ['mean']})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using custom functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "get_max = lambda x: x.value_counts(dropna=False).index[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ext price</th>\n",
       "      <th>quantity</th>\n",
       "      <th>unit price</th>\n",
       "      <th>sku</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>&lt;lambda&gt;</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S2-77896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.345856e+03</td>\n",
       "      <td>24.308667</td>\n",
       "      <td>55.007527</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sum</th>\n",
       "      <td>2.018784e+06</td>\n",
       "      <td>36463.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             ext price      quantity  unit price       sku\n",
       "<lambda>           NaN           NaN         NaN  S2-77896\n",
       "mean      1.345856e+03     24.308667   55.007527       NaN\n",
       "sum       2.018784e+06  36463.000000         NaN       NaN"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.agg({'ext price': ['sum', 'mean'], 'quantity': ['sum', 'mean'], 'unit price': ['mean'], 'sku': [get_max]})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Clean up the naming in the output by defining the name for get_max"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "get_max.__name__ = \"most frequent\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ext price</th>\n",
       "      <th>quantity</th>\n",
       "      <th>unit price</th>\n",
       "      <th>sku</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.345856e+03</td>\n",
       "      <td>24.308667</td>\n",
       "      <td>55.007527</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>most frequent</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S2-77896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sum</th>\n",
       "      <td>2.018784e+06</td>\n",
       "      <td>36463.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  ext price      quantity  unit price       sku\n",
       "mean           1.345856e+03     24.308667   55.007527       NaN\n",
       "most frequent           NaN           NaN         NaN  S2-77896\n",
       "sum            2.018784e+06  36463.000000         NaN       NaN"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.agg({'ext price': ['sum', 'mean'], 'quantity': ['sum', 'mean'], 'unit price': ['mean'], 'sku': [get_max]})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Using an OrderedDictionary to maintain column order"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "f = collections.OrderedDict([('ext price', ['sum', 'mean']), ('quantity', ['sum', 'mean']), ('sku', [get_max])])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ext price</th>\n",
       "      <th>quantity</th>\n",
       "      <th>sku</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.345856e+03</td>\n",
       "      <td>24.308667</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>most frequent</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>S2-77896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sum</th>\n",
       "      <td>2.018784e+06</td>\n",
       "      <td>36463.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  ext price      quantity       sku\n",
       "mean           1.345856e+03     24.308667       NaN\n",
       "most frequent           NaN           NaN  S2-77896\n",
       "sum            2.018784e+06  36463.000000       NaN"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.agg(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [conda env:pbpython3]",
   "language": "python",
   "name": "conda-env-pbpython3-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}