{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "source": [
    "(c) 2016 - present. Enplus Advisors, Inc."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "outputs": [],
   "source": [
    "import datetime as dt\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "pd.set_option('precision', 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "outputs": [],
   "source": [
    "# Sample dataset w/ 5 days of data\n",
    "\n",
    "def make_dataset(n_days=4):\n",
    "    if n_days not in (4, 5):\n",
    "        raise ValueError(f\"n_days must be 4 or 5, got {n_days:d}.\")\n",
    "    \n",
    "    data = {\n",
    "        'date': ['2015-12-28', '2015-12-29', '2015-12-30', \n",
    "                 '2015-12-31', '2016-01-04'],\n",
    "        'goog': [762.51, 776.60, 771.00, 758.88, 741.84],\n",
    "        'aapl': [106.82, 108.74, 107.32, 105.26, 105.35]\n",
    "    }\n",
    "    \n",
    "    n_max = len(data['date'])\n",
    "    slice_ = slice(n_max - n_days, n_max)\n",
    "    \n",
    "    sub = {k: v[slice_] for k,v in data.items()}\n",
    "    dates = sub['date']\n",
    "    n = len(dates)\n",
    "    \n",
    "    # breakpoint()\n",
    "    rv = pd.DataFrame({\n",
    "        'ticker': ['GOOG'] * n + ['AAPL'] * n,\n",
    "        'date': [pd.to_datetime(x) for x in dates] * 2,\n",
    "        'close': sub['goog'] + sub['aapl']\n",
    "    })\n",
    "    \n",
    "    return rv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "outputs": [],
   "source": [
    "def make_long():\n",
    "    day_1 = dt.date(2015, 12, 29)\n",
    "    day_2 = dt.date(2015, 12, 30)\n",
    "    \n",
    "    col_close = 'close'\n",
    "    col_open = 'open'\n",
    "    \n",
    "    cols = ['date', 'ticker', 'variable', 'value']\n",
    "    \n",
    "    rv = pd.DataFrame([\n",
    "      {'ticker': 'GOOG', 'date': day_1, 'variable': col_close, 'value': 776.60},\n",
    "      {'ticker': 'GOOG', 'date': day_2, 'variable': col_close, 'value': 771.00},\n",
    "      {'ticker': 'AAPL', 'date': day_1, 'variable': col_open,  'value': 107.01},\n",
    "      {'ticker': 'AAPL', 'date': day_1, 'variable': col_close, 'value': 105.26}\n",
    "    ], columns=cols)\n",
    "    \n",
    "    return rv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Programming with Data:<br>Advanced Python and Pandas\n",
    "\n",
    "# Advanced Merging & Reshaping"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Grouped and Ordered Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "source": [
    "Working again with securities market data. In quant finance, this is a\n",
    "common data type, daily stock prices."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### Display the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>date</th>\n",
       "      <th>close</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>776.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>771.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>758.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>741.84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>108.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>107.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>105.26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>105.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker       date   close\n",
       "0   GOOG 2015-12-29  776.60\n",
       "1   GOOG 2015-12-30  771.00\n",
       "2   GOOG 2015-12-31  758.88\n",
       "3   GOOG 2016-01-04  741.84\n",
       "4   AAPL 2015-12-29  108.74\n",
       "5   AAPL 2015-12-30  107.32\n",
       "6   AAPL 2015-12-31  105.26\n",
       "7   AAPL 2016-01-04  105.35"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "_dts = ['2015-12-29', '2015-12-30', '2015-12-31', '2016-01-04']\n",
    "_goog = [776.60, 771.00, 758.88, 741.84]\n",
    "_aapl = [108.74, 107.32, 105.26, 105.35]\n",
    "\n",
    "df = pd.DataFrame({\n",
    "    'ticker': ['GOOG'] * 4 + ['AAPL'] * 4,\n",
    "    'date': [pd.to_datetime(x) for x in _dts] * 2,\n",
    "    'close': _goog + _aapl\n",
    "})\n",
    "df\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### A single, ordered series"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>2.56</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        date  rate\n",
       "0 2015-12-30  2.40\n",
       "1 2016-01-04  2.56"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tbill = pd.DataFrame({\n",
    "    'date': [pd.to_datetime(x) for x in ['2015-12-30', '2016-01-04']],\n",
    "    'rate': [2.40, 2.56]\n",
    "})\n",
    "tbill"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Merge data that is grouped and ordered\n",
    "\n",
    "* Left panel is irregularly spaced, e.g. business days\n",
    "* Right time series also irregularly spaced, e.g. a sparse subset of the first\n",
    "  series"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### How not to do the merge"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "source": [
    "Don't use plain `pd.merge` and fill forward across groups."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>date</th>\n",
       "      <th>close</th>\n",
       "      <th>rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>776.60</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>771.00</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>758.88</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>741.84</td>\n",
       "      <td>2.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>108.74</td>\n",
       "      <td>2.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>107.32</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>105.26</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>105.35</td>\n",
       "      <td>2.56</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker       date   close  rate\n",
       "0   GOOG 2015-12-29  776.60   NaN\n",
       "1   GOOG 2015-12-30  771.00  2.40\n",
       "2   GOOG 2015-12-31  758.88  2.40\n",
       "3   GOOG 2016-01-04  741.84  2.56\n",
       "4   AAPL 2015-12-29  108.74  2.56\n",
       "5   AAPL 2015-12-30  107.32  2.40\n",
       "6   AAPL 2015-12-31  105.26  2.40\n",
       "7   AAPL 2016-01-04  105.35  2.56"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(df, tbill, on='date', how='left').ffill()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### Merge Ordered V2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>date</th>\n",
       "      <th>close</th>\n",
       "      <th>rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>776.60</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>771.00</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>758.88</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>741.84</td>\n",
       "      <td>2.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>108.74</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>107.32</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>105.26</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>105.35</td>\n",
       "      <td>2.56</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker       date   close  rate\n",
       "0   GOOG 2015-12-29  776.60   NaN\n",
       "1   GOOG 2015-12-30  771.00  2.40\n",
       "2   GOOG 2015-12-31  758.88  2.40\n",
       "3   GOOG 2016-01-04  741.84  2.56\n",
       "4   AAPL 2015-12-29  108.74   NaN\n",
       "5   AAPL 2015-12-30  107.32  2.40\n",
       "6   AAPL 2015-12-31  105.26  2.40\n",
       "7   AAPL 2016-01-04  105.35  2.56"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mkt = pd.merge_ordered(df, tbill, on='date', left_by='ticker', fill_method='ffill')\n",
    "mkt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Reshaping & Pivoting"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Wide and Long Formats\n",
    "\n",
    "* Depending on the operation or the data storage location, data stored\n",
    "  in a \"wide\" or \"long\" format"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### Long Format\n",
    "\n",
    "* Common format for data in relational databases because allows\n",
    "  new attributes without a schema change\n",
    "* \"Long\" format is also called \"stacked\" or \"record\" format in the\n",
    "  `pandas` documentation. Also called `Entity-Attribute-Value (EAV)`\n",
    "* \"Sparse\" by design"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "source": [
    "TODO: Include diagram of Long Format"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "outputs": [],
   "source": [
    "def make_long_aapl():\n",
    "    day_1 = dt.date(2015, 12, 29)\n",
    "    day_2 = dt.date(2015, 12, 30)\n",
    "    \n",
    "    col_close = 'close'\n",
    "    col_open = 'open'\n",
    "    \n",
    "    cols = ['date', 'ticker', 'variable', 'value']\n",
    "    \n",
    "    rv = pd.DataFrame([\n",
    "      {'ticker': 'AAPL', 'date': day_1, 'variable': col_open,  'value': 106.96},\n",
    "      {'ticker': 'AAPL', 'date': day_1, 'variable': col_close, 'value': 108.74},\n",
    "      {'ticker': 'AAPL', 'date': day_2, 'variable': col_open,  'value': 108.58},\n",
    "      {'ticker': 'AAPL', 'date': day_2, 'variable': col_close, 'value': 107.32}\n",
    "    ], columns=cols)\n",
    "    \n",
    "    return rv"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### Simplest Long Format\n",
    "\n",
    "* Multiple attributes for a single entity (AAPL)\n",
    "* Row for every period (12/29 & 12/30) x (number of attributes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>variable</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>open</td>\n",
       "      <td>106.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>close</td>\n",
       "      <td>108.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>open</td>\n",
       "      <td>108.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>close</td>\n",
       "      <td>107.32</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date ticker variable   value\n",
       "0  2015-12-29   AAPL     open  106.96\n",
       "1  2015-12-29   AAPL    close  108.74\n",
       "2  2015-12-30   AAPL     open  108.58\n",
       "3  2015-12-30   AAPL    close  107.32"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aapl_long = make_long_aapl()\n",
    "aapl_long"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### Wide Format\n",
    "\n",
    "* Identifiers stored in the index\n",
    "* Each attribute has its own column\n",
    "* Common format for use by machine learning algorithms"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "source": [
    "TODO: Include diagram of wide format"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### Long-to-Wide"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>variable</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>open</td>\n",
       "      <td>106.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>close</td>\n",
       "      <td>108.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>open</td>\n",
       "      <td>108.58</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>close</td>\n",
       "      <td>107.32</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         date ticker variable   value\n",
       "0  2015-12-29   AAPL     open  106.96\n",
       "1  2015-12-29   AAPL    close  108.74\n",
       "2  2015-12-30   AAPL     open  108.58\n",
       "3  2015-12-30   AAPL    close  107.32"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aapl_long"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>variable</th>\n",
       "      <th>close</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-12-29</th>\n",
       "      <td>108.74</td>\n",
       "      <td>106.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-12-30</th>\n",
       "      <td>107.32</td>\n",
       "      <td>108.58</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "variable     close    open\n",
       "date                      \n",
       "2015-12-29  108.74  106.96\n",
       "2015-12-30  107.32  108.58"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aapl_long.pivot(index='date', columns='variable', values='value')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### Long-to-Wide with multiple ID columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th colspan=\"2\" halign=\"left\">value</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>variable</th>\n",
       "      <th>close</th>\n",
       "      <th>open</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2015-12-29</th>\n",
       "      <th>AAPL</th>\n",
       "      <td>108.74</td>\n",
       "      <td>106.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2015-12-30</th>\n",
       "      <th>AAPL</th>\n",
       "      <td>107.32</td>\n",
       "      <td>108.58</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                    value        \n",
       "variable            close    open\n",
       "date       ticker                \n",
       "2015-12-29 AAPL    108.74  106.96\n",
       "2015-12-30 AAPL    107.32  108.58"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aapl_wide = aapl_long.set_index(['date', 'ticker', 'variable']).unstack()\n",
    "aapl_wide"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### Wide-to-Long"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date</th>\n",
       "      <th>ticker</th>\n",
       "      <th>variable</th>\n",
       "      <th>value</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>close</td>\n",
       "      <td>108.74</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>open</td>\n",
       "      <td>106.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>close</td>\n",
       "      <td>107.32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>AAPL</td>\n",
       "      <td>open</td>\n",
       "      <td>108.58</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        date ticker variable   value\n",
       "0 2015-12-29   AAPL    close  108.74\n",
       "1 2015-12-29   AAPL     open  106.96\n",
       "2 2015-12-30   AAPL    close  107.32\n",
       "3 2015-12-30   AAPL     open  108.58"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aapl_wide.stack().reset_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "## Pivot Tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ticker</th>\n",
       "      <th>date</th>\n",
       "      <th>close</th>\n",
       "      <th>rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>776.60</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>771.00</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>758.88</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>GOOG</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>741.84</td>\n",
       "      <td>2.56</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-29</td>\n",
       "      <td>108.74</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-30</td>\n",
       "      <td>107.32</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2015-12-31</td>\n",
       "      <td>105.26</td>\n",
       "      <td>2.40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>AAPL</td>\n",
       "      <td>2016-01-04</td>\n",
       "      <td>105.35</td>\n",
       "      <td>2.56</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  ticker       date   close  rate\n",
       "0   GOOG 2015-12-29  776.60   NaN\n",
       "1   GOOG 2015-12-30  771.00  2.40\n",
       "2   GOOG 2015-12-31  758.88  2.40\n",
       "3   GOOG 2016-01-04  741.84  2.56\n",
       "4   AAPL 2015-12-29  108.74   NaN\n",
       "5   AAPL 2015-12-30  107.32  2.40\n",
       "6   AAPL 2015-12-31  105.26  2.40\n",
       "7   AAPL 2016-01-04  105.35  2.56"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "mkt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "### Simple Pivot Table"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>close</th>\n",
       "      <th>rate</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ticker</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>AAPL</th>\n",
       "      <td>106.67</td>\n",
       "      <td>2.45</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GOOG</th>\n",
       "      <td>762.08</td>\n",
       "      <td>2.45</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         close  rate\n",
       "ticker              \n",
       "AAPL    106.67  2.45\n",
       "GOOG    762.08  2.45"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.pivot_table(mkt, index='ticker', aggfunc='mean')"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": true,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "351px"
   },
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}