{
 "metadata": {
  "name": "",
  "signature": "sha256:3d56dcbfbf79a5d9735e5240f044566b3157072f90f085a06c84b6ee3e94ae70"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 1,
     "metadata": {},
     "source": [
      "Prediction (out of sample)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from __future__ import print_function\n",
      "import numpy as np\n",
      "import statsmodels.api as sm"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Artificial data"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "nsample = 50\n",
      "sig = 0.25\n",
      "x1 = np.linspace(0, 20, nsample)\n",
      "X = np.column_stack((x1, np.sin(x1), (x1-5)**2))\n",
      "X = sm.add_constant(X)\n",
      "beta = [5., 0.5, 0.5, -0.02]\n",
      "y_true = np.dot(X, beta)\n",
      "y = y_true + sig * np.random.normal(size=nsample)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Estimation "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "olsmod = sm.OLS(y, X)\n",
      "olsres = olsmod.fit()\n",
      "print(olsres.summary())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## In-sample prediction"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "ypred = olsres.predict(X)\n",
      "print(ypred)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Create a new sample of explanatory variables Xnew, predict and plot"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "x1n = np.linspace(20.5,25, 10)\n",
      "Xnew = np.column_stack((x1n, np.sin(x1n), (x1n-5)**2))\n",
      "Xnew = sm.add_constant(Xnew)\n",
      "ynewpred =  olsres.predict(Xnew) # predict out of sample\n",
      "print(ynewpred)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Plot comparison"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import matplotlib.pyplot as plt\n",
      "\n",
      "fig, ax = plt.subplots()\n",
      "ax.plot(x1, y, 'o', label=\"Data\")\n",
      "ax.plot(x1, y_true, 'b-', label=\"True\")\n",
      "ax.plot(np.hstack((x1, x1n)), np.hstack((ypred, ynewpred)), 'r', label=\"OLS prediction\")\n",
      "ax.legend(loc=\"best\");"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Predicting with Formulas"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Using formulas can make both estimation and prediction a lot easier"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "from statsmodels.formula.api import ols\n",
      "\n",
      "data = {\"x1\" : x1, \"y\" : y}\n",
      "\n",
      "res = ols(\"y ~ x1 + np.sin(x1) + I((x1-5)**2)\", data=data).fit()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "We use the `I` to indicate use of the Identity transform. Ie., we don't want any expansion magic from using `**2`"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "res.params"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Now we only have to pass the single variable and we get the transformed right-hand side variables automatically"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "res.predict(exog=dict(x1=x1n))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}