{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Time Series" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "np.random.seed(12345)\n", "import matplotlib.pyplot as plt\n", "plt.rc('figure', figsize=(10, 6))\n", "PREVIOUS_MAX_ROWS = pd.options.display.max_rows\n", "pd.options.display.max_rows = 20\n", "np.set_printoptions(precision=4, suppress=True)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Date and Time Data Types and Tools" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from datetime import datetime\n", "now = datetime.now()\n", "now\n", "now.year, now.month, now.day" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)\n", "delta\n", "delta.days\n", "delta.seconds" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from datetime import timedelta\n", "start = datetime(2011, 1, 7)\n", "start + timedelta(12)\n", "start - 2 * timedelta(12)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Converting Between String and Datetime" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "stamp = datetime(2011, 1, 3)\n", "str(stamp)\n", "stamp.strftime('%Y-%m-%d')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "value = '2011-01-03'\n", "datetime.strptime(value, '%Y-%m-%d')\n", "datestrs = ['7/6/2011', '8/6/2011']\n", "[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from dateutil.parser import parse\n", "parse('2011-01-03')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "parse('Jan 31, 1997 10:45 PM')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "parse('6/12/2011', dayfirst=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']\n", "pd.to_datetime(datestrs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "idx = pd.to_datetime(datestrs + [None])\n", "idx\n", "idx[2]\n", "pd.isnull(idx)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Time Series Basics" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from datetime import datetime\n", "dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),\n", " datetime(2011, 1, 7), datetime(2011, 1, 8),\n", " datetime(2011, 1, 10), datetime(2011, 1, 12)]\n", "ts = pd.Series(np.random.randn(6), index=dates)\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts + ts[::2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.index.dtype" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "stamp = ts.index[0]\n", "stamp" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Indexing, Selection, Subsetting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "stamp = ts.index[2]\n", "ts[stamp]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts['1/10/2011']\n", "ts['20110110']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "longer_ts = pd.Series(np.random.randn(1000),\n", " index=pd.date_range('1/1/2000', periods=1000))\n", "longer_ts\n", "longer_ts['2001']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "longer_ts['2001-05']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts[datetime(2011, 1, 7):]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts\n", "ts['1/6/2011':'1/11/2011']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.truncate(after='1/9/2011')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')\n", "long_df = pd.DataFrame(np.random.randn(100, 4),\n", " index=dates,\n", " columns=['Colorado', 'Texas',\n", " 'New York', 'Ohio'])\n", "long_df.loc['5-2001']" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Time Series with Duplicate Indices" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',\n", " '1/2/2000', '1/3/2000'])\n", "dup_ts = pd.Series(np.arange(5), index=dates)\n", "dup_ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "dup_ts.index.is_unique" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "dup_ts['1/3/2000'] # not duplicated\n", "dup_ts['1/2/2000'] # duplicated" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "grouped = dup_ts.groupby(level=0)\n", "grouped.mean()\n", "grouped.count()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Date Ranges, Frequencies, and Shifting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts\n", "resampler = ts.resample('D')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Generating Date Ranges" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "index = pd.date_range('2012-04-01', '2012-06-01')\n", "index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.date_range(start='2012-04-01', periods=20)\n", "pd.date_range(end='2012-06-01', periods=20)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.date_range('2000-01-01', '2000-12-01', freq='BM')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.date_range('2012-05-02 12:56:31', periods=5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.date_range('2012-05-02 12:56:31', periods=5, normalize=True)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Frequencies and Date Offsets" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from pandas.tseries.offsets import Hour, Minute\n", "hour = Hour()\n", "hour" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "four_hours = Hour(4)\n", "four_hours" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4h')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "Hour(2) + Minute(30)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.date_range('2000-01-01', periods=10, freq='1h30min')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Week of month dates" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.date_range('2012-01-01', '2012-09-01', freq='WOM-3FRI')\n", "list(rng)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Shifting (Leading and Lagging) Data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts = pd.Series(np.random.randn(4),\n", " index=pd.date_range('1/1/2000', periods=4, freq='M'))\n", "ts\n", "ts.shift(2)\n", "ts.shift(-2)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "ts / ts.shift(1) - 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.shift(2, freq='M')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.shift(3, freq='D')\n", "ts.shift(1, freq='90T')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Shifting dates with offsets" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from pandas.tseries.offsets import Day, MonthEnd\n", "now = datetime(2011, 11, 17)\n", "now + 3 * Day()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "now + MonthEnd()\n", "now + MonthEnd(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "offset = MonthEnd()\n", "offset.rollforward(now)\n", "offset.rollback(now)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts = pd.Series(np.random.randn(20),\n", " index=pd.date_range('1/15/2000', periods=20, freq='4d'))\n", "ts\n", "ts.groupby(offset.rollforward).mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.resample('M').mean()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Time Zone Handling" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import pytz\n", "pytz.common_timezones[-5:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "tz = pytz.timezone('America/New_York')\n", "tz" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Time Zone Localization and Conversion" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')\n", "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "print(ts.index.tz)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts\n", "ts_utc = ts.tz_localize('UTC')\n", "ts_utc\n", "ts_utc.index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts_utc.tz_convert('America/New_York')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts_eastern = ts.tz_localize('America/New_York')\n", "ts_eastern.tz_convert('UTC')\n", "ts_eastern.tz_convert('Europe/Berlin')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.index.tz_localize('Asia/Shanghai')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Operations with Time Zone−Aware Timestamp Objects" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "stamp = pd.Timestamp('2011-03-12 04:00')\n", "stamp_utc = stamp.tz_localize('utc')\n", "stamp_utc.tz_convert('America/New_York')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')\n", "stamp_moscow" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "stamp_utc.value\n", "stamp_utc.tz_convert('America/New_York').value" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from pandas.tseries.offsets import Hour\n", "stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')\n", "stamp\n", "stamp + Hour()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')\n", "stamp\n", "stamp + 2 * Hour()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Operations Between Different Time Zones" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')\n", "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", "ts\n", "ts1 = ts[:7].tz_localize('Europe/London')\n", "ts2 = ts1[2:].tz_convert('Europe/Moscow')\n", "result = ts1 + ts2\n", "result.index" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Periods and Period Arithmetic" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "p = pd.Period(2007, freq='A-DEC')\n", "p" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "p + 5\n", "p - 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.Period('2014', freq='A-DEC') - p" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.period_range('2000-01-01', '2000-06-30', freq='M')\n", "rng" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.Series(np.random.randn(6), index=rng)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "values = ['2001Q3', '2002Q2', '2003Q1']\n", "index = pd.PeriodIndex(values, freq='Q-DEC')\n", "index" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Period Frequency Conversion" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "p = pd.Period('2007', freq='A-DEC')\n", "p\n", "p.asfreq('M', how='start')\n", "p.asfreq('M', how='end')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "p = pd.Period('2007', freq='A-JUN')\n", "p\n", "p.asfreq('M', 'start')\n", "p.asfreq('M', 'end')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "p = pd.Period('Aug-2007', 'M')\n", "p.asfreq('A-JUN')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.period_range('2006', '2009', freq='A-DEC')\n", "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", "ts\n", "ts.asfreq('M', how='start')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.asfreq('B', how='end')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Quarterly Period Frequencies" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "p = pd.Period('2012Q4', freq='Q-JAN')\n", "p" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "p.asfreq('D', 'start')\n", "p.asfreq('D', 'end')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n", "p4pm\n", "p4pm.to_timestamp()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')\n", "ts = pd.Series(np.arange(len(rng)), index=rng)\n", "ts\n", "new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n", "ts.index = new_rng.to_timestamp()\n", "ts" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Converting Timestamps to Periods (and Back)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.date_range('2000-01-01', periods=3, freq='M')\n", "ts = pd.Series(np.random.randn(3), index=rng)\n", "ts\n", "pts = ts.to_period()\n", "pts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.date_range('1/29/2000', periods=6, freq='D')\n", "ts2 = pd.Series(np.random.randn(6), index=rng)\n", "ts2\n", "ts2.to_period('M')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pts = ts2.to_period()\n", "pts\n", "pts.to_timestamp(how='end')" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Creating a PeriodIndex from Arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "data = pd.read_csv('examples/macrodata.csv')\n", "data.head(5)\n", "data.year\n", "data.quarter" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "index = pd.PeriodIndex(year=data.year, quarter=data.quarter,\n", " freq='Q-DEC')\n", "index\n", "data.index = index\n", "data.infl" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Resampling and Frequency Conversion" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.date_range('2000-01-01', periods=100, freq='D')\n", "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", "ts\n", "ts.resample('M').mean()\n", "ts.resample('M', kind='period').mean()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Downsampling" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "rng = pd.date_range('2000-01-01', periods=12, freq='T')\n", "ts = pd.Series(np.arange(12), index=rng)\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.resample('5min', closed='right').sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.resample('5min', closed='right').sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.resample('5min', closed='right', label='right').sum()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.resample('5min', closed='right',\n", " label='right', loffset='-1s').sum()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "#### Open-High-Low-Close (OHLC) resampling" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "ts.resample('5min').ohlc()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Upsampling and Interpolation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "frame = pd.DataFrame(np.random.randn(2, 4),\n", " index=pd.date_range('1/1/2000', periods=2,\n", " freq='W-WED'),\n", " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", "frame" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "df_daily = frame.resample('D').asfreq()\n", "df_daily" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "frame.resample('D').ffill()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "frame.resample('D').ffill(limit=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "frame.resample('W-THU').ffill()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Resampling with Periods" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "frame = pd.DataFrame(np.random.randn(24, 4),\n", " index=pd.period_range('1-2000', '12-2001',\n", " freq='M'),\n", " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", "frame[:5]\n", "annual_frame = frame.resample('A-DEC').mean()\n", "annual_frame" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "# Q-DEC: Quarterly, year ending in December\n", "annual_frame.resample('Q-DEC').ffill()\n", "annual_frame.resample('Q-DEC', convention='end').ffill()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "annual_frame.resample('Q-MAR').ffill()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Moving Window Functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "close_px_all = pd.read_csv('examples/stock_px_2.csv',\n", " parse_dates=True, index_col=0)\n", "close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]\n", "close_px = close_px.resample('B').ffill()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "close_px.AAPL.plot()\n", "close_px.AAPL.rolling(250).mean().plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "appl_std250 = close_px.AAPL.rolling(250, min_periods=10).std()\n", "appl_std250[5:12]\n", "appl_std250.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "expanding_mean = appl_std250.expanding().mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "close_px.rolling(60).mean().plot(logy=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "close_px.rolling('20D').mean()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Exponentially Weighted Functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "aapl_px = close_px.AAPL['2006':'2007']\n", "ma60 = aapl_px.rolling(30, min_periods=20).mean()\n", "ewma60 = aapl_px.ewm(span=30).mean()\n", "ma60.plot(style='k--', label='Simple MA')\n", "ewma60.plot(style='k-', label='EW MA')\n", "plt.legend()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### Binary Moving Window Functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "spx_px = close_px_all['SPX']\n", "spx_rets = spx_px.pct_change()\n", "returns = close_px.pct_change()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "corr = returns.AAPL.rolling(125, min_periods=100).corr(spx_rets)\n", "corr.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "corr = returns.rolling(125, min_periods=100).corr(spx_rets)\n", "corr.plot()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "### User-Defined Moving Window Functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from scipy.stats import percentileofscore\n", "score_at_2percent = lambda x: percentileofscore(x, 0.02)\n", "result = returns.AAPL.rolling(250).apply(score_at_2percent)\n", "result.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "pd.options.display.max_rows = PREVIOUS_MAX_ROWS" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "## Conclusion" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 0 }