{ "metadata": { "name": "", "signature": "sha256:5b05396fa56ec58d503b5038f250184417df9fafd2972494b8f3ec4729eb07c0" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Weighted Least Squares" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import print_function\n", "import numpy as np\n", "from scipy import stats\n", "import statsmodels.api as sm\n", "import matplotlib.pyplot as plt\n", "from statsmodels.sandbox.regression.predstd import wls_prediction_std\n", "from statsmodels.iolib.table import (SimpleTable, default_txt_fmt)\n", "np.random.seed(1024)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## WLS Estimation\n", "\n", "### Artificial data: Heteroscedasticity 2 groups \n", "\n", "Model assumptions:\n", "\n", " * Misspecification: true model is quadratic, estimate only linear\n", " * Independent noise/error term\n", " * Two groups for error variance, low and high variance groups" ] }, { "cell_type": "code", "collapsed": false, "input": [ "nsample = 50\n", "x = np.linspace(0, 20, nsample)\n", "X = np.column_stack((x, (x - 5)**2))\n", "X = sm.add_constant(X)\n", "beta = [5., 0.5, -0.01]\n", "sig = 0.5\n", "w = np.ones(nsample)\n", "w[nsample * 6/10:] = 3\n", "y_true = np.dot(X, beta)\n", "e = np.random.normal(size=nsample)\n", "y = y_true + sig * w * e \n", "X = X[:,[0,1]]" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### WLS knowing the true variance ratio of heteroscedasticity" ] }, { "cell_type": "code", "collapsed": false, "input": [ "mod_wls = sm.WLS(y, X, weights=1./w)\n", "res_wls = mod_wls.fit()\n", "print(res_wls.summary())" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## OLS vs. WLS\n", "\n", "Estimate an OLS model for comparison: " ] }, { "cell_type": "code", "collapsed": false, "input": [ "res_ols = sm.OLS(y, X).fit()\n", "print(res_ols.params)\n", "print(res_wls.params)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Compare the WLS standard errors to heteroscedasticity corrected OLS standard errors:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "se = np.vstack([[res_wls.bse], [res_ols.bse], [res_ols.HC0_se], \n", " [res_ols.HC1_se], [res_ols.HC2_se], [res_ols.HC3_se]])\n", "se = np.round(se,4)\n", "colnames = ['x1', 'const']\n", "rownames = ['WLS', 'OLS', 'OLS_HC0', 'OLS_HC1', 'OLS_HC3', 'OLS_HC3']\n", "tabl = SimpleTable(se, colnames, rownames, txt_fmt=default_txt_fmt)\n", "print(tabl)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Calculate OLS prediction interval:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "covb = res_ols.cov_params()\n", "prediction_var = res_ols.mse_resid + (X * np.dot(covb,X.T).T).sum(1)\n", "prediction_std = np.sqrt(prediction_var)\n", "tppf = stats.t.ppf(0.975, res_ols.df_resid)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "prstd_ols, iv_l_ols, iv_u_ols = wls_prediction_std(res_ols)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Draw a plot to compare predicted values in WLS and OLS:" ] }, { "cell_type": "code", "collapsed": false, "input": [ "prstd, iv_l, iv_u = wls_prediction_std(res_wls)\n", "\n", "fig, ax = plt.subplots(figsize=(8,6))\n", "ax.plot(x, y, 'o', label=\"Data\")\n", "ax.plot(x, y_true, 'b-', label=\"True\")\n", "# OLS\n", "ax.plot(x, res_ols.fittedvalues, 'r--')\n", "ax.plot(x, iv_u_ols, 'r--', label=\"OLS\")\n", "ax.plot(x, iv_l_ols, 'r--')\n", "# WLS\n", "ax.plot(x, res_wls.fittedvalues, 'g--.')\n", "ax.plot(x, iv_u, 'g--', label=\"WLS\")\n", "ax.plot(x, iv_l, 'g--')\n", "ax.legend(loc=\"best\");" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Feasible Weighted Least Squares (2-stage FWLS)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "resid1 = res_ols.resid[w==1.]\n", "var1 = resid1.var(ddof=int(res_ols.df_model)+1)\n", "resid2 = res_ols.resid[w!=1.]\n", "var2 = resid2.var(ddof=int(res_ols.df_model)+1)\n", "w_est = w.copy()\n", "w_est[w!=1.] = np.sqrt(var2) / np.sqrt(var1)\n", "res_fwls = sm.WLS(y, X, 1./w_est).fit()\n", "print(res_fwls.summary())" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }