{ "metadata": { "name": "tsa_arma" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "ARMA example using sunpots data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "from scipy import stats\n", "import pandas\n", "import matplotlib.pyplot as plt\n", "\n", "import statsmodels.api as sm" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "from statsmodels.graphics.api import qqplot" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print sm.datasets.sunspots.NOTE" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "dta = sm.datasets.sunspots.load_pandas().data" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "dta.index = pandas.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))\n", "del dta[\"YEAR\"]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "dta.plot(figsize=(12,8));" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax1 = fig.add_subplot(211)\n", "fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1)\n", "ax2 = fig.add_subplot(212)\n", "fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "arma_mod20 = sm.tsa.ARMA(dta, (2,0)).fit()\n", "print arma_mod20.params" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "arma_mod30 = sm.tsa.ARMA(dta, (3,0)).fit()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print arma_mod30.params" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Does our model obey the theory?" ] }, { "cell_type": "code", "collapsed": false, "input": [ "sm.stats.durbin_watson(arma_mod30.resid.values)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "ax = arma_mod30.resid.plot(ax=ax);" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "resid = arma_mod30.resid" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "stats.normaltest(resid)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "fig = qqplot(resid, line='q', ax=ax, fit=True)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax1 = fig.add_subplot(211)\n", "fig = sm.graphics.tsa.plot_acf(resid.values.squeeze(), lags=40, ax=ax1)\n", "ax2 = fig.add_subplot(212)\n", "fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "r,q,p = sm.tsa.acf(resid.values.squeeze(), qstat=True)\n", "data = np.c_[range(1,41), r[1:], q, p]\n", "table = pandas.DataFrame(data, columns=['lag', \"AC\", \"Q\", \"Prob(>Q)\"])\n", "print table.set_index('lag')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* This indicates a lack of fit." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* In-sample dynamic prediction. How good does our model do?" ] }, { "cell_type": "code", "collapsed": false, "input": [ "predict_sunspots = arma_mod30.predict('1990', '2012', dynamic=True)\n", "print predict_sunspots" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "ax = dta.ix['1950':].plot(figsize=(12,8))\n", "ax = predict_sunspots.plot(ax=ax, style='r--', label='Dynamic Prediction');\n", "ax.legend();\n", "ax.axis((-20.0, 38.0, -4.0, 200.0));" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "def mean_forecast_err(y, yhat):\n", " return y.sub(yhat).mean()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "mean_forecast_err(dta.SUNACTIVITY, predict_sunspots)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Exercise: Can you obtain a better fit for the Sunspots model? (Hint: sm.tsa.AR has a method select_order)" ] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Simulated ARMA(4,1): Model Identification is Difficult" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from statsmodels.tsa.arima_process import arma_generate_sample, ArmaProcess" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "np.random.seed(1234)\n", "# include zero-th lag\n", "arparams = np.array([1, .75, -.65, -.55, .9])\n", "maparams = np.array([1, .65])" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Let's make sure this models is estimable." ] }, { "cell_type": "code", "collapsed": false, "input": [ "arma_t = ArmaProcess(arparams, maparams)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "arma_t.isinvertible()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "arma_t.isstationary()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "raw", "metadata": {}, "source": [ "* What does this mean?" ] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "ax.plot(arma_t.generate_sample(size=50));" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "arparams = np.array([1, .35, -.15, .55, .1])\n", "maparams = np.array([1, .65])\n", "arma_t = ArmaProcess(arparams, maparams)\n", "arma_t.isstationary()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "arma_rvs = arma_t.generate_sample(size=500, burnin=250, scale=2.5)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax1 = fig.add_subplot(211)\n", "fig = sm.graphics.tsa.plot_acf(arma_rvs, lags=40, ax=ax1)\n", "ax2 = fig.add_subplot(212)\n", "fig = sm.graphics.tsa.plot_pacf(arma_rvs, lags=40, ax=ax2)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "raw", "metadata": {}, "source": [ "* For mixed ARMA processes the Autocorrelation function is a mixture of exponentials and damped sine waves after (q-p) lags. \n", "* The partial autocorrelation function is a mixture of exponentials and dampened sine waves after (p-q) lags." ] }, { "cell_type": "code", "collapsed": false, "input": [ "arma11 = sm.tsa.ARMA(arma_rvs, (1,1)).fit()\n", "resid = arma11.resid\n", "r,q,p = sm.tsa.acf(resid, qstat=True)\n", "data = np.c_[range(1,41), r[1:], q, p]\n", "table = pandas.DataFrame(data, columns=['lag', \"AC\", \"Q\", \"Prob(>Q)\"])\n", "print table.set_index('lag')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "arma41 = sm.tsa.ARMA(arma_rvs, (4,1)).fit()\n", "resid = arma41.resid\n", "r,q,p = sm.tsa.acf(resid, qstat=True)\n", "data = np.c_[range(1,41), r[1:], q, p]\n", "table = pandas.DataFrame(data, columns=['lag', \"AC\", \"Q\", \"Prob(>Q)\"])\n", "print table.set_index('lag')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Exercise: How good of in-sample prediction can you do for another series, say, CPI" ] }, { "cell_type": "code", "collapsed": false, "input": [ "macrodta = sm.datasets.macrodata.load_pandas().data\n", "macrodta.index = pandas.Index(sm.tsa.datetools.dates_from_range('1959Q1', '2009Q3'))\n", "cpi = macrodta[\"cpi\"]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Hint: " ] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "ax = cpi.plot(ax=ax);\n", "ax.legend();" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "raw", "metadata": {}, "source": [ "P-value of the unit-root test, resoundly rejects the null of no unit-root." ] }, { "cell_type": "code", "collapsed": false, "input": [ "print sm.tsa.adfuller(cpi)[1]" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }