{ "metadata": { "name": "", "signature": "sha256:9136438335b909aad7f511cb3e18b8ad11b64630b98d8f96c556ee7ddabcf76b" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Autoregressive Moving Average (ARMA): Sunspots data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook replicates the existing ARMA notebook using the `statsmodels.tsa.statespace.SARIMAX` class rather than the `statsmodels.tsa.ARMA` class." ] }, { "cell_type": "code", "collapsed": false, "input": [ "%matplotlib inline" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import print_function\n", "import numpy as np\n", "from scipy import stats\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "import statsmodels.api as sm" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "from statsmodels.graphics.api import qqplot" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Sunpots Data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "print(sm.datasets.sunspots.NOTE)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "dta = sm.datasets.sunspots.load_pandas().data" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))\n", "del dta[\"YEAR\"]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "dta.plot(figsize=(12,4));" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax1 = fig.add_subplot(211)\n", "fig = sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40, ax=ax1)\n", "ax2 = fig.add_subplot(212)\n", "fig = sm.graphics.tsa.plot_pacf(dta, lags=40, ax=ax2)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "arma_mod20 = sm.tsa.statespace.SARIMAX(dta, order=(2,0,0), trend='c').fit()\n", "print(arma_mod20.params)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "arma_mod30 = sm.tsa.statespace.SARIMAX(dta, order=(3,0,0), trend='c').fit()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print(arma_mod20.aic, arma_mod20.bic, arma_mod20.hqic)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print(arma_mod30.params)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print(arma_mod30.aic, arma_mod30.bic, arma_mod30.hqic)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* Does our model obey the theory?" ] }, { "cell_type": "code", "collapsed": false, "input": [ "sm.stats.durbin_watson(arma_mod30.resid()[0])" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,4))\n", "ax = fig.add_subplot(111)\n", "ax = plt.plot(arma_mod30.resid()[0])" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "resid = arma_mod30.resid()[0]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "stats.normaltest(resid)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,4))\n", "ax = fig.add_subplot(111)\n", "fig = qqplot(resid, line='q', ax=ax, fit=True)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax1 = fig.add_subplot(211)\n", "fig = sm.graphics.tsa.plot_acf(resid, lags=40, ax=ax1)\n", "ax2 = fig.add_subplot(212)\n", "fig = sm.graphics.tsa.plot_pacf(resid, lags=40, ax=ax2)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "r,q,p = sm.tsa.acf(resid, qstat=True)\n", "data = np.c_[range(1,41), r[1:], q, p]\n", "table = pd.DataFrame(data, columns=['lag', \"AC\", \"Q\", \"Prob(>Q)\"])\n", "print(table.set_index('lag'))" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* This indicates a lack of fit." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* In-sample dynamic prediction. How good does our model do?" ] }, { "cell_type": "code", "collapsed": false, "input": [ "predict_sunspots = arma_mod30.predict(start='1990', end='2012', dynamic=True)\n", "index = pd.date_range('1990','2013',freq='A')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig, ax = plt.subplots(figsize=(12, 8))\n", "ax.plot(dta.ix['1950':].index._mpl_repr(), dta.ix['1950':])\n", "ax.plot(index, predict_sunspots[0], 'r');" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "def mean_forecast_err(y, yhat):\n", " return y.sub(yhat).mean()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "mean_forecast_err(dta.SUNACTIVITY, pd.Series(predict_sunspots[0], index=index))" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }