{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Time series" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import division\n", "from pandas import Series, DataFrame\n", "import pandas as pd\n", "from numpy.random import randn\n", "import numpy as np\n", "pd.options.display.max_rows = 12\n", "np.set_printoptions(precision=4, suppress=True)\n", "import matplotlib.pyplot as plt\n", "plt.rc('figure', figsize=(12, 4))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Date and Time Data Types and Tools" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from datetime import datetime\n", "now = datetime.now()\n", "now" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "now.year, now.month, now.day" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)\n", "delta" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "delta.days" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "delta.seconds" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from datetime import timedelta\n", "start = datetime(2011, 1, 7)\n", "start + timedelta(12)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "start - 2 * timedelta(12)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Converting between string and datetime" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stamp = datetime(2011, 1, 3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "str(stamp)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stamp.strftime('%Y-%m-%d')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "value = '2011-01-03'\n", "datetime.strptime(value, '%Y-%m-%d')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "datestrs = ['7/6/2011', '8/6/2011']\n", "[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from dateutil.parser import parse\n", "parse('2011-01-03')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "parse('Jan 31, 1997 10:45 PM')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "parse('6/12/2011', dayfirst=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "datestrs" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.to_datetime(datestrs)\n", "# note: output changed (no '00:00:00' anymore)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "idx = pd.to_datetime(datestrs + [None])\n", "idx" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "idx[2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.isnull(idx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Time Series Basics" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from datetime import datetime\n", "dates = [datetime(2011, 1, 2), datetime(2011, 1, 5), datetime(2011, 1, 7),\n", " datetime(2011, 1, 8), datetime(2011, 1, 10), datetime(2011, 1, 12)]\n", "ts = Series(np.random.randn(6), index=dates)\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "type(ts)\n", "# note: output changed to \"pandas.core.series.Series\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts + ts[::2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.index.dtype\n", "# note: output changed from dtype('datetime64[ns]') to dtype(' to Timestamp('2011-01-02 00:00:00')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Indexing, selection, subsetting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stamp = ts.index[2]\n", "ts[stamp]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts['1/10/2011']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts['20110110']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "longer_ts = Series(np.random.randn(1000),\n", " index=pd.date_range('1/1/2000', periods=1000))\n", "longer_ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "longer_ts['2001']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "longer_ts['2001-05']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts[datetime(2011, 1, 7):]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts['1/6/2011':'1/11/2011']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.truncate(after='1/9/2011')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')\n", "long_df = DataFrame(np.random.randn(100, 4),\n", " index=dates,\n", " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", "long_df.ix['5-2001']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Time series with duplicate indices" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000',\n", " '1/3/2000'])\n", "dup_ts = Series(np.arange(5), index=dates)\n", "dup_ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dup_ts.index.is_unique" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dup_ts['1/3/2000'] # not duplicated" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dup_ts['1/2/2000'] # duplicated" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "grouped = dup_ts.groupby(level=0)\n", "grouped.mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "grouped.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Date ranges, Frequencies, and Shifting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.resample('D')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generating date ranges" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "index = pd.date_range('4/1/2012', '6/1/2012')\n", "index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.date_range(start='4/1/2012', periods=20)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.date_range(end='6/1/2012', periods=20)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.date_range('1/1/2000', '12/1/2000', freq='BM')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.date_range('5/2/2012 12:56:31', periods=5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.date_range('5/2/2012 12:56:31', periods=5, normalize=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Frequencies and Date Offsets" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from pandas.tseries.offsets import Hour, Minute\n", "hour = Hour()\n", "hour" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "four_hours = Hour(4)\n", "four_hours" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.date_range('1/1/2000', '1/3/2000 23:59', freq='4h')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "Hour(2) + Minute(30)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.date_range('1/1/2000', periods=10, freq='1h30min')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Week of month dates" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('1/1/2012', '9/1/2012', freq='WOM-3FRI')\n", "list(rng)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Shifting (leading and lagging) data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts = Series(np.random.randn(4),\n", " index=pd.date_range('1/1/2000', periods=4, freq='M'))\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.shift(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.shift(-2)" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "ts / ts.shift(1) - 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.shift(2, freq='M')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.shift(3, freq='D')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.shift(1, freq='3D')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.shift(1, freq='90T')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Shifting dates with offsets" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from pandas.tseries.offsets import Day, MonthEnd\n", "now = datetime(2011, 11, 17)\n", "now + 3 * Day()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "now + MonthEnd()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "now + MonthEnd(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "offset = MonthEnd()\n", "offset.rollforward(now)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "offset.rollback(now)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts = Series(np.random.randn(20),\n", " index=pd.date_range('1/15/2000', periods=20, freq='4d'))\n", "ts.groupby(offset.rollforward).mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.resample('M', how='mean')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Time Zone Handling" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pytz\n", "pytz.common_timezones[-5:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "tz = pytz.timezone('US/Eastern')\n", "tz" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Localization and Conversion" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')\n", "ts = Series(np.random.randn(len(rng)), index=rng)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "print(ts.index.tz)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts_utc = ts.tz_localize('UTC')\n", "ts_utc" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts_utc.index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts_utc.tz_convert('US/Eastern')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts_eastern = ts.tz_localize('US/Eastern')\n", "ts_eastern.tz_convert('UTC')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts_eastern.tz_convert('Europe/Berlin')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.index.tz_localize('Asia/Shanghai')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Operations with time zone-aware Timestamp objects" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stamp = pd.Timestamp('2011-03-12 04:00')\n", "stamp_utc = stamp.tz_localize('utc')\n", "stamp_utc.tz_convert('US/Eastern')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stamp_moscow = pd.Timestamp('2011-03-12 04:00', tz='Europe/Moscow')\n", "stamp_moscow" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stamp_utc.value" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stamp_utc.tz_convert('US/Eastern').value" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# 30 minutes before DST transition\n", "from pandas.tseries.offsets import Hour\n", "stamp = pd.Timestamp('2012-03-12 01:30', tz='US/Eastern')\n", "stamp" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stamp + Hour()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# 90 minutes before DST transition\n", "stamp = pd.Timestamp('2012-11-04 00:30', tz='US/Eastern')\n", "stamp" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "stamp + 2 * Hour()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Operations between different time zones" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('3/7/2012 9:30', periods=10, freq='B')\n", "ts = Series(np.random.randn(len(rng)), index=rng)\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts1 = ts[:7].tz_localize('Europe/London')\n", "ts2 = ts1[2:].tz_convert('Europe/Moscow')\n", "result = ts1 + ts2\n", "result.index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Periods and Period Arithmetic" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p = pd.Period(2007, freq='A-DEC')\n", "p" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p + 5" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p - 2" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.Period('2014', freq='A-DEC') - p" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.period_range('1/1/2000', '6/30/2000', freq='M')\n", "rng" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "Series(np.random.randn(6), index=rng)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "values = ['2001Q3', '2002Q2', '2003Q1']\n", "index = pd.PeriodIndex(values, freq='Q-DEC')\n", "index" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Period Frequency Conversion" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p = pd.Period('2007', freq='A-DEC')\n", "p.asfreq('M', how='start')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p.asfreq('M', how='end')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p = pd.Period('2007', freq='A-JUN')\n", "p.asfreq('M', 'start')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p.asfreq('M', 'end')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p = pd.Period('Aug-2007', 'M')\n", "p.asfreq('A-JUN')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.period_range('2006', '2009', freq='A-DEC')\n", "ts = Series(np.random.randn(len(rng)), index=rng)\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.asfreq('M', how='start')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.asfreq('B', how='end')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Quarterly period frequencies" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p = pd.Period('2012Q4', freq='Q-JAN')\n", "p" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p.asfreq('D', 'start')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p.asfreq('D', 'end')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p4pm = (p.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n", "p4pm" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "p4pm.to_timestamp()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.period_range('2011Q3', '2012Q4', freq='Q-JAN')\n", "ts = Series(np.arange(len(rng)), index=rng)\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "new_rng = (rng.asfreq('B', 'e') - 1).asfreq('T', 's') + 16 * 60\n", "ts.index = new_rng.to_timestamp()\n", "ts" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Converting Timestamps to Periods (and back)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('1/1/2000', periods=3, freq='M')\n", "ts = Series(randn(3), index=rng)\n", "pts = ts.to_period()\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('1/29/2000', periods=6, freq='D')\n", "ts2 = Series(randn(6), index=rng)\n", "ts2.to_period('M')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pts = ts.to_period()\n", "pts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pts.to_timestamp(how='end')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Creating a PeriodIndex from arrays" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = pd.read_csv('ch08/macrodata.csv')\n", "data.year" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data.quarter" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "index = pd.PeriodIndex(year=data.year, quarter=data.quarter, freq='Q-DEC')\n", "index" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data.index = index\n", "data.infl" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Resampling and Frequency Conversion" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n", "ts = Series(randn(len(rng)), index=rng)\n", "ts.resample('M', how='mean')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.resample('M', how='mean', kind='period')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Downsampling" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('1/1/2000', periods=12, freq='T')\n", "ts = Series(np.arange(12), index=rng)\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.resample('5min', how='sum')\n", "# note: output changed (as the default changed from closed='right', label='right' to closed='left', label='left'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.resample('5min', how='sum', closed='left')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.resample('5min', how='sum', closed='left', label='left')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.resample('5min', how='sum', loffset='-1s')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Open-High-Low-Close (OHLC) resampling" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.resample('5min', how='ohlc')\n", "# note: output changed because of changed defaults" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Resampling with GroupBy" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('1/1/2000', periods=100, freq='D')\n", "ts = Series(np.arange(100), index=rng)\n", "ts.groupby(lambda x: x.month).mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.groupby(lambda x: x.weekday).mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Upsampling and interpolation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "frame = DataFrame(np.random.randn(2, 4),\n", " index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),\n", " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", "frame" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df_daily = frame.resample('D')\n", "df_daily" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "frame.resample('D', fill_method='ffill')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "frame.resample('D', fill_method='ffill', limit=2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "frame.resample('W-THU', fill_method='ffill')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Resampling with periods" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "frame = DataFrame(np.random.randn(24, 4),\n", " index=pd.period_range('1-2000', '12-2001', freq='M'),\n", " columns=['Colorado', 'Texas', 'New York', 'Ohio'])\n", "frame[:5]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "annual_frame = frame.resample('A-DEC', how='mean')\n", "annual_frame" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Q-DEC: Quarterly, year ending in December\n", "annual_frame.resample('Q-DEC', fill_method='ffill')\n", "# note: output changed, default value changed from convention='end' to convention='start' + 'start' changed to span-like\n", "# also the following cells" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "annual_frame.resample('Q-DEC', fill_method='ffill', convention='start')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "annual_frame.resample('Q-MAR', fill_method='ffill')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Time series plotting" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "close_px_all = pd.read_csv('ch09/stock_px.csv', parse_dates=True, index_col=0)\n", "close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]\n", "close_px = close_px.resample('B', fill_method='ffill')\n", "close_px.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "close_px['AAPL'].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "close_px.ix['2009'].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "close_px['AAPL'].ix['01-2011':'03-2011'].plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "appl_q = close_px['AAPL'].resample('Q-DEC', fill_method='ffill')\n", "appl_q.ix['2009':].plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Moving window functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "close_px = close_px.asfreq('B').fillna(method='ffill')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "close_px.AAPL.plot()\n", "pd.rolling_mean(close_px.AAPL, 250).plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.figure()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "appl_std250 = pd.rolling_std(close_px.AAPL, 250, min_periods=10)\n", "appl_std250[5:12]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "appl_std250.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Define expanding mean in terms of rolling_mean\n", "expanding_mean = lambda x: rolling_mean(x, len(x), min_periods=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "pd.rolling_mean(close_px, 60).plot(logy=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.close('all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Exponentially-weighted functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "fig, axes = plt.subplots(nrows=2, ncols=1, sharex=True, sharey=True,\n", " figsize=(12, 7))\n", "\n", "aapl_px = close_px.AAPL['2005':'2009']\n", "\n", "ma60 = pd.rolling_mean(aapl_px, 60, min_periods=50)\n", "ewma60 = pd.ewma(aapl_px, span=60)\n", "\n", "aapl_px.plot(style='k-', ax=axes[0])\n", "ma60.plot(style='k--', ax=axes[0])\n", "aapl_px.plot(style='k-', ax=axes[1])\n", "ewma60.plot(style='k--', ax=axes[1])\n", "axes[0].set_title('Simple MA')\n", "axes[1].set_title('Exponentially-weighted MA')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Binary moving window functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "close_px\n", "spx_px = close_px_all['SPX']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "spx_rets = spx_px / spx_px.shift(1) - 1\n", "returns = close_px.pct_change()\n", "corr = pd.rolling_corr(returns.AAPL, spx_rets, 125, min_periods=100)\n", "corr.plot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "corr = pd.rolling_corr(returns, spx_rets, 125, min_periods=100)\n", "corr.plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### User-defined moving window functions" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from scipy.stats import percentileofscore\n", "score_at_2percent = lambda x: percentileofscore(x, 0.02)\n", "result = pd.rolling_apply(returns.AAPL, 250, score_at_2percent)\n", "result.plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Performance and Memory Usage Notes" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('1/1/2000', periods=10000000, freq='10ms')\n", "ts = Series(np.random.randn(len(rng)), index=rng)\n", "ts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "ts.resample('15min', how='ohlc').info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%timeit ts.resample('15min', how='ohlc')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "rng = pd.date_range('1/1/2000', periods=10000000, freq='1s')\n", "ts = Series(np.random.randn(len(rng)), index=rng)\n", "%timeit ts.resample('15s', how='ohlc')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }