{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "(c) 2016 - present. Enplus Advisors, Inc." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "import datetime as dt\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "pd.set_option('precision', 2)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "# Sample dataset w/ 5 days of data\n", "\n", "def make_dataset(n_days=4):\n", " if n_days not in (4, 5):\n", " raise ValueError(f\"n_days must be 4 or 5, got {n_days:d}.\")\n", " \n", " data = {\n", " 'date': ['2015-12-28', '2015-12-29', '2015-12-30', \n", " '2015-12-31', '2016-01-04'],\n", " 'goog': [762.51, 776.60, 771.00, 758.88, 741.84],\n", " 'aapl': [106.82, 108.74, 107.32, 105.26, 105.35]\n", " }\n", " \n", " n_max = len(data['date'])\n", " slice_ = slice(n_max - n_days, n_max)\n", " \n", " sub = {k: v[slice_] for k,v in data.items()}\n", " dates = sub['date']\n", " n = len(dates)\n", " \n", " # breakpoint()\n", " rv = pd.DataFrame({\n", " 'ticker': ['GOOG'] * n + ['AAPL'] * n,\n", " 'date': [pd.to_datetime(x) for x in dates] * 2,\n", " 'close': sub['goog'] + sub['aapl']\n", " })\n", " \n", " return rv" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "def make_long():\n", " day_1 = dt.date(2015, 12, 29)\n", " day_2 = dt.date(2015, 12, 30)\n", " \n", " col_close = 'close'\n", " col_open = 'open'\n", " \n", " cols = ['date', 'ticker', 'variable', 'value']\n", " \n", " rv = pd.DataFrame([\n", " {'ticker': 'GOOG', 'date': day_1, 'variable': col_close, 'value': 776.60},\n", " {'ticker': 'GOOG', 'date': day_2, 'variable': col_close, 'value': 771.00},\n", " {'ticker': 'AAPL', 'date': day_1, 'variable': col_open, 'value': 107.01},\n", " {'ticker': 'AAPL', 'date': day_1, 'variable': col_close, 'value': 105.26}\n", " ], columns=cols)\n", " \n", " return rv" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Programming with Data:<br>Advanced Python and Pandas\n", "\n", "# Advanced Merging & Reshaping" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Grouped and Ordered Data" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "Working again with securities market data. In quant finance, this is a\n", "common data type, daily stock prices." ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Display the data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>ticker</th>\n", " <th>date</th>\n", " <th>close</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>GOOG</td>\n", " <td>2015-12-29</td>\n", " <td>776.60</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>GOOG</td>\n", " <td>2015-12-30</td>\n", " <td>771.00</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>GOOG</td>\n", " <td>2015-12-31</td>\n", " <td>758.88</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>GOOG</td>\n", " <td>2016-01-04</td>\n", " <td>741.84</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>AAPL</td>\n", " <td>2015-12-29</td>\n", " <td>108.74</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>AAPL</td>\n", " <td>2015-12-30</td>\n", " <td>107.32</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>AAPL</td>\n", " <td>2015-12-31</td>\n", " <td>105.26</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>AAPL</td>\n", " <td>2016-01-04</td>\n", " <td>105.35</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " ticker date close\n", "0 GOOG 2015-12-29 776.60\n", "1 GOOG 2015-12-30 771.00\n", "2 GOOG 2015-12-31 758.88\n", "3 GOOG 2016-01-04 741.84\n", "4 AAPL 2015-12-29 108.74\n", "5 AAPL 2015-12-30 107.32\n", "6 AAPL 2015-12-31 105.26\n", "7 AAPL 2016-01-04 105.35" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "_dts = ['2015-12-29', '2015-12-30', '2015-12-31', '2016-01-04']\n", "_goog = [776.60, 771.00, 758.88, 741.84]\n", "_aapl = [108.74, 107.32, 105.26, 105.35]\n", "\n", "df = pd.DataFrame({\n", " 'ticker': ['GOOG'] * 4 + ['AAPL'] * 4,\n", " 'date': [pd.to_datetime(x) for x in _dts] * 2,\n", " 'close': _goog + _aapl\n", "})\n", "df\n" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### A single, ordered series" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>rate</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2015-12-30</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2016-01-04</td>\n", " <td>2.56</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " date rate\n", "0 2015-12-30 2.40\n", "1 2016-01-04 2.56" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tbill = pd.DataFrame({\n", " 'date': [pd.to_datetime(x) for x in ['2015-12-30', '2016-01-04']],\n", " 'rate': [2.40, 2.56]\n", "})\n", "tbill" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Merge data that is grouped and ordered\n", "\n", "* Left panel is irregularly spaced, e.g. business days\n", "* Right time series also irregularly spaced, e.g. a sparse subset of the first\n", " series" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### How not to do the merge" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "fragment" } }, "source": [ "Don't use plain `pd.merge` and fill forward across groups." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>ticker</th>\n", " <th>date</th>\n", " <th>close</th>\n", " <th>rate</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>GOOG</td>\n", " <td>2015-12-29</td>\n", " <td>776.60</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>GOOG</td>\n", " <td>2015-12-30</td>\n", " <td>771.00</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>GOOG</td>\n", " <td>2015-12-31</td>\n", " <td>758.88</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>GOOG</td>\n", " <td>2016-01-04</td>\n", " <td>741.84</td>\n", " <td>2.56</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>AAPL</td>\n", " <td>2015-12-29</td>\n", " <td>108.74</td>\n", " <td>2.56</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>AAPL</td>\n", " <td>2015-12-30</td>\n", " <td>107.32</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>AAPL</td>\n", " <td>2015-12-31</td>\n", " <td>105.26</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>AAPL</td>\n", " <td>2016-01-04</td>\n", " <td>105.35</td>\n", " <td>2.56</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " ticker date close rate\n", "0 GOOG 2015-12-29 776.60 NaN\n", "1 GOOG 2015-12-30 771.00 2.40\n", "2 GOOG 2015-12-31 758.88 2.40\n", "3 GOOG 2016-01-04 741.84 2.56\n", "4 AAPL 2015-12-29 108.74 2.56\n", "5 AAPL 2015-12-30 107.32 2.40\n", "6 AAPL 2015-12-31 105.26 2.40\n", "7 AAPL 2016-01-04 105.35 2.56" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.merge(df, tbill, on='date', how='left').ffill()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Merge Ordered V2" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>ticker</th>\n", " <th>date</th>\n", " <th>close</th>\n", " <th>rate</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>GOOG</td>\n", " <td>2015-12-29</td>\n", " <td>776.60</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>GOOG</td>\n", " <td>2015-12-30</td>\n", " <td>771.00</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>GOOG</td>\n", " <td>2015-12-31</td>\n", " <td>758.88</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>GOOG</td>\n", " <td>2016-01-04</td>\n", " <td>741.84</td>\n", " <td>2.56</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>AAPL</td>\n", " <td>2015-12-29</td>\n", " <td>108.74</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>AAPL</td>\n", " <td>2015-12-30</td>\n", " <td>107.32</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>AAPL</td>\n", " <td>2015-12-31</td>\n", " <td>105.26</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>AAPL</td>\n", " <td>2016-01-04</td>\n", " <td>105.35</td>\n", " <td>2.56</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " ticker date close rate\n", "0 GOOG 2015-12-29 776.60 NaN\n", "1 GOOG 2015-12-30 771.00 2.40\n", "2 GOOG 2015-12-31 758.88 2.40\n", "3 GOOG 2016-01-04 741.84 2.56\n", "4 AAPL 2015-12-29 108.74 NaN\n", "5 AAPL 2015-12-30 107.32 2.40\n", "6 AAPL 2015-12-31 105.26 2.40\n", "7 AAPL 2016-01-04 105.35 2.56" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mkt = pd.merge_ordered(df, tbill, on='date', left_by='ticker', fill_method='ffill')\n", "mkt" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Reshaping & Pivoting" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Wide and Long Formats\n", "\n", "* Depending on the operation or the data storage location, data stored\n", " in a \"wide\" or \"long\" format" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Long Format\n", "\n", "* Common format for data in relational databases because allows\n", " new attributes without a schema change\n", "* \"Long\" format is also called \"stacked\" or \"record\" format in the\n", " `pandas` documentation. Also called `Entity-Attribute-Value (EAV)`\n", "* \"Sparse\" by design" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "TODO: Include diagram of Long Format" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "def make_long_aapl():\n", " day_1 = dt.date(2015, 12, 29)\n", " day_2 = dt.date(2015, 12, 30)\n", " \n", " col_close = 'close'\n", " col_open = 'open'\n", " \n", " cols = ['date', 'ticker', 'variable', 'value']\n", " \n", " rv = pd.DataFrame([\n", " {'ticker': 'AAPL', 'date': day_1, 'variable': col_open, 'value': 106.96},\n", " {'ticker': 'AAPL', 'date': day_1, 'variable': col_close, 'value': 108.74},\n", " {'ticker': 'AAPL', 'date': day_2, 'variable': col_open, 'value': 108.58},\n", " {'ticker': 'AAPL', 'date': day_2, 'variable': col_close, 'value': 107.32}\n", " ], columns=cols)\n", " \n", " return rv" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Simplest Long Format\n", "\n", "* Multiple attributes for a single entity (AAPL)\n", "* Row for every period (12/29 & 12/30) x (number of attributes)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>ticker</th>\n", " <th>variable</th>\n", " <th>value</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2015-12-29</td>\n", " <td>AAPL</td>\n", " <td>open</td>\n", " <td>106.96</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2015-12-29</td>\n", " <td>AAPL</td>\n", " <td>close</td>\n", " <td>108.74</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2015-12-30</td>\n", " <td>AAPL</td>\n", " <td>open</td>\n", " <td>108.58</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2015-12-30</td>\n", " <td>AAPL</td>\n", " <td>close</td>\n", " <td>107.32</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " date ticker variable value\n", "0 2015-12-29 AAPL open 106.96\n", "1 2015-12-29 AAPL close 108.74\n", "2 2015-12-30 AAPL open 108.58\n", "3 2015-12-30 AAPL close 107.32" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "aapl_long = make_long_aapl()\n", "aapl_long" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Wide Format\n", "\n", "* Identifiers stored in the index\n", "* Each attribute has its own column\n", "* Common format for use by machine learning algorithms" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "TODO: Include diagram of wide format" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Long-to-Wide" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>ticker</th>\n", " <th>variable</th>\n", " <th>value</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2015-12-29</td>\n", " <td>AAPL</td>\n", " <td>open</td>\n", " <td>106.96</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2015-12-29</td>\n", " <td>AAPL</td>\n", " <td>close</td>\n", " <td>108.74</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2015-12-30</td>\n", " <td>AAPL</td>\n", " <td>open</td>\n", " <td>108.58</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2015-12-30</td>\n", " <td>AAPL</td>\n", " <td>close</td>\n", " <td>107.32</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " date ticker variable value\n", "0 2015-12-29 AAPL open 106.96\n", "1 2015-12-29 AAPL close 108.74\n", "2 2015-12-30 AAPL open 108.58\n", "3 2015-12-30 AAPL close 107.32" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "aapl_long" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th>variable</th>\n", " <th>close</th>\n", " <th>open</th>\n", " </tr>\n", " <tr>\n", " <th>date</th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2015-12-29</th>\n", " <td>108.74</td>\n", " <td>106.96</td>\n", " </tr>\n", " <tr>\n", " <th>2015-12-30</th>\n", " <td>107.32</td>\n", " <td>108.58</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ "variable close open\n", "date \n", "2015-12-29 108.74 106.96\n", "2015-12-30 107.32 108.58" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "aapl_long.pivot(index='date', columns='variable', values='value')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Long-to-Wide with multiple ID columns" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead tr th {\n", " text-align: left;\n", " }\n", "\n", " .dataframe thead tr:last-of-type th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr>\n", " <th></th>\n", " <th></th>\n", " <th colspan=\"2\" halign=\"left\">value</th>\n", " </tr>\n", " <tr>\n", " <th></th>\n", " <th>variable</th>\n", " <th>close</th>\n", " <th>open</th>\n", " </tr>\n", " <tr>\n", " <th>date</th>\n", " <th>ticker</th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2015-12-29</th>\n", " <th>AAPL</th>\n", " <td>108.74</td>\n", " <td>106.96</td>\n", " </tr>\n", " <tr>\n", " <th>2015-12-30</th>\n", " <th>AAPL</th>\n", " <td>107.32</td>\n", " <td>108.58</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " value \n", "variable close open\n", "date ticker \n", "2015-12-29 AAPL 108.74 106.96\n", "2015-12-30 AAPL 107.32 108.58" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "aapl_wide = aapl_long.set_index(['date', 'ticker', 'variable']).unstack()\n", "aapl_wide" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Wide-to-Long" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>date</th>\n", " <th>ticker</th>\n", " <th>variable</th>\n", " <th>value</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2015-12-29</td>\n", " <td>AAPL</td>\n", " <td>close</td>\n", " <td>108.74</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2015-12-29</td>\n", " <td>AAPL</td>\n", " <td>open</td>\n", " <td>106.96</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2015-12-30</td>\n", " <td>AAPL</td>\n", " <td>close</td>\n", " <td>107.32</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2015-12-30</td>\n", " <td>AAPL</td>\n", " <td>open</td>\n", " <td>108.58</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " date ticker variable value\n", "0 2015-12-29 AAPL close 108.74\n", "1 2015-12-29 AAPL open 106.96\n", "2 2015-12-30 AAPL close 107.32\n", "3 2015-12-30 AAPL open 108.58" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "aapl_wide.stack().reset_index()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Pivot Tables" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>ticker</th>\n", " <th>date</th>\n", " <th>close</th>\n", " <th>rate</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>GOOG</td>\n", " <td>2015-12-29</td>\n", " <td>776.60</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>GOOG</td>\n", " <td>2015-12-30</td>\n", " <td>771.00</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>GOOG</td>\n", " <td>2015-12-31</td>\n", " <td>758.88</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>GOOG</td>\n", " <td>2016-01-04</td>\n", " <td>741.84</td>\n", " <td>2.56</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>AAPL</td>\n", " <td>2015-12-29</td>\n", " <td>108.74</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>AAPL</td>\n", " <td>2015-12-30</td>\n", " <td>107.32</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>AAPL</td>\n", " <td>2015-12-31</td>\n", " <td>105.26</td>\n", " <td>2.40</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>AAPL</td>\n", " <td>2016-01-04</td>\n", " <td>105.35</td>\n", " <td>2.56</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " ticker date close rate\n", "0 GOOG 2015-12-29 776.60 NaN\n", "1 GOOG 2015-12-30 771.00 2.40\n", "2 GOOG 2015-12-31 758.88 2.40\n", "3 GOOG 2016-01-04 741.84 2.56\n", "4 AAPL 2015-12-29 108.74 NaN\n", "5 AAPL 2015-12-30 107.32 2.40\n", "6 AAPL 2015-12-31 105.26 2.40\n", "7 AAPL 2016-01-04 105.35 2.56" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mkt" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Simple Pivot Table" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>close</th>\n", " <th>rate</th>\n", " </tr>\n", " <tr>\n", " <th>ticker</th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>AAPL</th>\n", " <td>106.67</td>\n", " <td>2.45</td>\n", " </tr>\n", " <tr>\n", " <th>GOOG</th>\n", " <td>762.08</td>\n", " <td>2.45</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " close rate\n", "ticker \n", "AAPL 106.67 2.45\n", "GOOG 762.08 2.45" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.pivot_table(mkt, index='ticker', aggfunc='mean')" ] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": true, "skip_h1_title": true, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "351px" }, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 1 }