{ "metadata": { "name": "", "signature": "sha256:80e0e9e8e51fa9ae2c41cd51009cfc30acc428830edbe431ec07bc6996461d10" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Resampling a Time Series in pandas\n", "\n", "- **Author:** [Chris Albon](http://www.chrisalbon.com/), [@ChrisAlbon](https://twitter.com/chrisalbon)\n", "- **Date:** -\n", "- **Repo:** [Python 3 code snippets for data science](https://github.com/chrisalbon/code_py)\n", "- **Note:**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import required modules" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 65 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create a dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df = pd.DataFrame()\n", "\n", "df['german_army'] = np.random.randint(low=20000, high=30000, size=100)\n", "df['allied_army'] = np.random.randint(low=20000, high=40000, size=100)\n", "df.index = pd.date_range('1/1/2014', periods=100, freq='H')\n", "\n", "df.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-01-01 00:00:00 21331 39348
2014-01-01 01:00:00 27465 37775
2014-01-01 02:00:00 20310 21410
2014-01-01 03:00:00 22145 25203
2014-01-01 04:00:00 22871 36609
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 66, "text": [ " german_army allied_army\n", "2014-01-01 00:00:00 21331 39348\n", "2014-01-01 01:00:00 27465 37775\n", "2014-01-01 02:00:00 20310 21410\n", "2014-01-01 03:00:00 22145 25203\n", "2014-01-01 04:00:00 22871 36609\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 66 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Truncate the dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.truncate(before='1/2/2014', after='1/3/2014')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-01-02 00:00:00 27290 36896
2014-01-02 01:00:00 26079 31298
2014-01-02 02:00:00 28663 26045
2014-01-02 03:00:00 23328 39686
2014-01-02 04:00:00 25243 24545
2014-01-02 05:00:00 29998 23463
2014-01-02 06:00:00 22779 27992
2014-01-02 07:00:00 25155 33747
2014-01-02 08:00:00 29279 32127
2014-01-02 09:00:00 23138 27684
2014-01-02 10:00:00 24622 24583
2014-01-02 11:00:00 29302 22012
2014-01-02 12:00:00 29941 25643
2014-01-02 13:00:00 28130 35022
2014-01-02 14:00:00 26980 24659
2014-01-02 15:00:00 20435 27753
2014-01-02 16:00:00 29291 25136
2014-01-02 17:00:00 24594 35866
2014-01-02 18:00:00 28721 32888
2014-01-02 19:00:00 23935 25330
2014-01-02 20:00:00 22258 30980
2014-01-02 21:00:00 22660 39378
2014-01-02 22:00:00 24651 32796
2014-01-02 23:00:00 26162 30592
2014-01-03 00:00:00 21352 38357
\n", "

25 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 67, "text": [ " german_army allied_army\n", "2014-01-02 00:00:00 27290 36896\n", "2014-01-02 01:00:00 26079 31298\n", "2014-01-02 02:00:00 28663 26045\n", "2014-01-02 03:00:00 23328 39686\n", "2014-01-02 04:00:00 25243 24545\n", "2014-01-02 05:00:00 29998 23463\n", "2014-01-02 06:00:00 22779 27992\n", "2014-01-02 07:00:00 25155 33747\n", "2014-01-02 08:00:00 29279 32127\n", "2014-01-02 09:00:00 23138 27684\n", "2014-01-02 10:00:00 24622 24583\n", "2014-01-02 11:00:00 29302 22012\n", "2014-01-02 12:00:00 29941 25643\n", "2014-01-02 13:00:00 28130 35022\n", "2014-01-02 14:00:00 26980 24659\n", "2014-01-02 15:00:00 20435 27753\n", "2014-01-02 16:00:00 29291 25136\n", "2014-01-02 17:00:00 24594 35866\n", "2014-01-02 18:00:00 28721 32888\n", "2014-01-02 19:00:00 23935 25330\n", "2014-01-02 20:00:00 22258 30980\n", "2014-01-02 21:00:00 22660 39378\n", "2014-01-02 22:00:00 24651 32796\n", "2014-01-02 23:00:00 26162 30592\n", "2014-01-03 00:00:00 21352 38357\n", "\n", "[25 rows x 2 columns]" ] } ], "prompt_number": 67 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set the dataframe's index" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.index = df.index + pd.DateOffset(months=4, days=5)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 68 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### View the dataframe" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-05-06 00:00:00 21331 39348
2014-05-06 01:00:00 27465 37775
2014-05-06 02:00:00 20310 21410
2014-05-06 03:00:00 22145 25203
2014-05-06 04:00:00 22871 36609
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 69, "text": [ " german_army allied_army\n", "2014-05-06 00:00:00 21331 39348\n", "2014-05-06 01:00:00 27465 37775\n", "2014-05-06 02:00:00 20310 21410\n", "2014-05-06 03:00:00 22145 25203\n", "2014-05-06 04:00:00 22871 36609\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 69 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Lead a variable 1 hour" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.shift(1).head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-05-06 00:00:00 NaN NaN
2014-05-06 01:00:00 21331 39348
2014-05-06 02:00:00 27465 37775
2014-05-06 03:00:00 20310 21410
2014-05-06 04:00:00 22145 25203
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 70, "text": [ " german_army allied_army\n", "2014-05-06 00:00:00 NaN NaN\n", "2014-05-06 01:00:00 21331 39348\n", "2014-05-06 02:00:00 27465 37775\n", "2014-05-06 03:00:00 20310 21410\n", "2014-05-06 04:00:00 22145 25203\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 70 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Lag a variable 1 hour" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.shift(-1).tail()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-05-09 23:00:00 28359 39177
2014-05-10 00:00:00 27881 29686
2014-05-10 01:00:00 26585 26210
2014-05-10 02:00:00 22266 35398
2014-05-10 03:00:00 NaN NaN
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 71, "text": [ " german_army allied_army\n", "2014-05-09 23:00:00 28359 39177\n", "2014-05-10 00:00:00 27881 29686\n", "2014-05-10 01:00:00 26585 26210\n", "2014-05-10 02:00:00 22266 35398\n", "2014-05-10 03:00:00 NaN NaN\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 71 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Aggregate into days by summing up the value of each hourly observation" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.resample('D', how='sum')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-05-06 586399 722666
2014-05-07 622634 716121
2014-05-08 609257 781811
2014-05-09 589086 672985
2014-05-10 105091 130471
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 72, "text": [ " german_army allied_army\n", "2014-05-06 586399 722666\n", "2014-05-07 622634 716121\n", "2014-05-08 609257 781811\n", "2014-05-09 589086 672985\n", "2014-05-10 105091 130471\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 72 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Aggregate into days by averaging up the value of each hourly observation" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.resample('D', how='mean')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-05-06 24433.291667 30111.083333
2014-05-07 25943.083333 29838.375000
2014-05-08 25385.708333 32575.458333
2014-05-09 24545.250000 28041.041667
2014-05-10 26272.750000 32617.750000
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 73, "text": [ " german_army allied_army\n", "2014-05-06 24433.291667 30111.083333\n", "2014-05-07 25943.083333 29838.375000\n", "2014-05-08 25385.708333 32575.458333\n", "2014-05-09 24545.250000 28041.041667\n", "2014-05-10 26272.750000 32617.750000\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 73 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Aggregate into days by taking the min value up the value of each hourly observation" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.resample('D', how='min')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-05-06 20310 20241
2014-05-07 20435 22012
2014-05-08 20006 20266
2014-05-09 20089 20308
2014-05-10 22266 26210
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 74, "text": [ " german_army allied_army\n", "2014-05-06 20310 20241\n", "2014-05-07 20435 22012\n", "2014-05-08 20006 20266\n", "2014-05-09 20089 20308\n", "2014-05-10 22266 26210\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 74 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Aggregate into days by taking the median value of each day's worth of hourly observation" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.resample('D', how='median')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-05-06 23820 29733.0
2014-05-07 25661 29292.0
2014-05-08 25276 32969.0
2014-05-09 24177 26887.5
2014-05-10 27233 32542.0
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 75, "text": [ " german_army allied_army\n", "2014-05-06 23820 29733.0\n", "2014-05-07 25661 29292.0\n", "2014-05-08 25276 32969.0\n", "2014-05-09 24177 26887.5\n", "2014-05-10 27233 32542.0\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 75 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Aggregate into days by taking the first value of each day's worth of hourly observation" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.resample('D', how='first')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-05-06 21331 39348
2014-05-07 27290 36896
2014-05-08 21352 38357
2014-05-09 25561 21868
2014-05-10 28359 39177
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 76, "text": [ " german_army allied_army\n", "2014-05-06 21331 39348\n", "2014-05-07 27290 36896\n", "2014-05-08 21352 38357\n", "2014-05-09 25561 21868\n", "2014-05-10 28359 39177\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 76 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Aggregate into days by taking the last value of each day's worth of hourly observation" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.resample('D', how='last')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
2014-05-06 24976 29632
2014-05-07 26162 30592
2014-05-08 28828 36941
2014-05-09 25573 24839
2014-05-10 22266 35398
\n", "

5 rows \u00d7 2 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 77, "text": [ " german_army allied_army\n", "2014-05-06 24976 29632\n", "2014-05-07 26162 30592\n", "2014-05-08 28828 36941\n", "2014-05-09 25573 24839\n", "2014-05-10 22266 35398\n", "\n", "[5 rows x 2 columns]" ] } ], "prompt_number": 77 }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Aggregate into days by taking the first, last, highest, and lowest value of each day's worth of hourly observation" ] }, { "cell_type": "code", "collapsed": false, "input": [ "df.resample('D', how='ohlc')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
german_armyallied_army
openhighlowcloseopenhighlowclose
2014-05-06 21331 29557 20310 24976 39348 39348 20241 29632
2014-05-07 27290 29998 20435 26162 36896 39686 22012 30592
2014-05-08 21352 29253 20006 28828 38357 39996 20266 36941
2014-05-09 25561 29792 20089 25573 21868 38827 20308 24839
2014-05-10 28359 28359 22266 22266 39177 39177 26210 35398
\n", "

5 rows \u00d7 8 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 78, "text": [ " german_army allied_army \n", " open high low close open high low close\n", "2014-05-06 21331 29557 20310 24976 39348 39348 20241 29632\n", "2014-05-07 27290 29998 20435 26162 36896 39686 22012 30592\n", "2014-05-08 21352 29253 20006 28828 38357 39996 20266 36941\n", "2014-05-09 25561 29792 20089 25573 21868 38827 20308 24839\n", "2014-05-10 28359 28359 22266 22266 39177 39177 26210 35398\n", "\n", "[5 rows x 8 columns]" ] } ], "prompt_number": 78 } ], "metadata": {} } ] }