{ "metadata": { "name": "", "signature": "sha256:d2d9d9999df0d82e05a34bd3e5e4f35dd2e89adfdd7af344e29965ceb7cefa2b" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "# load numpy and pandas for data manipulation\n", "import numpy as np\n", "import pandas as pd\n", "\n", "# load statsmodels as alias ``sm``\n", "import statsmodels.api as sm\n", "\n", "# load the longley dataset into a pandas data frame - first column (year) used as row labels\n", "df = pd.read_csv('http://vincentarelbundock.github.io/Rdatasets/csv/datasets/longley.csv', index_col=0)\n", "df.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GNP.deflatorGNPUnemployedArmed.ForcesPopulationYearEmployed
1947 83.0 234.289 235.6 159.0 107.608 1947 60.323
1948 88.5 259.426 232.5 145.6 108.632 1948 61.122
1949 88.2 258.054 368.2 161.6 109.773 1949 60.171
1950 89.5 284.599 335.1 165.0 110.929 1950 61.187
1951 96.2 328.975 209.9 309.9 112.075 1951 63.221
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ " GNP.deflator GNP Unemployed Armed.Forces Population Year \\\n", "1947 83.0 234.289 235.6 159.0 107.608 1947 \n", "1948 88.5 259.426 232.5 145.6 108.632 1948 \n", "1949 88.2 258.054 368.2 161.6 109.773 1949 \n", "1950 89.5 284.599 335.1 165.0 110.929 1950 \n", "1951 96.2 328.975 209.9 309.9 112.075 1951 \n", "\n", " Employed \n", "1947 60.323 \n", "1948 61.122 \n", "1949 60.171 \n", "1950 61.187 \n", "1951 63.221 " ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "import statsmodels.formula.api as smf\n", "# formula: response ~ predictors\n", "est = smf.ols(formula='Employed ~ GNP + Population + Year', data=df).fit()\n", "est.summary()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: Employed R-squared: 0.979
Model: OLS Adj. R-squared: 0.974
Method: Least Squares F-statistic: 190.1
Date: Thu, 22 Jan 2015 Prob (F-statistic): 2.22e-10
Time: 11:44:49 Log-Likelihood: -11.227
No. Observations: 16 AIC: 30.45
Df Residuals: 12 BIC: 33.55
Df Model: 3
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 416.9465 740.264 0.563 0.584 -1195.950 2029.843
GNP 0.0679 0.015 4.436 0.001 0.035 0.101
Population -0.3597 0.193 -1.860 0.088 -0.781 0.062
Year -0.1718 0.388 -0.443 0.666 -1.016 0.673
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 1.348 Durbin-Watson: 1.219
Prob(Omnibus): 0.510 Jarque-Bera (JB): 0.640
Skew: 0.489 Prob(JB): 0.726
Kurtosis: 2.934 Cond. No. 1.05e+07
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 9, "text": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: Employed R-squared: 0.979\n", "Model: OLS Adj. R-squared: 0.974\n", "Method: Least Squares F-statistic: 190.1\n", "Date: Thu, 22 Jan 2015 Prob (F-statistic): 2.22e-10\n", "Time: 11:44:49 Log-Likelihood: -11.227\n", "No. Observations: 16 AIC: 30.45\n", "Df Residuals: 12 BIC: 33.55\n", "Df Model: 3 \n", "==============================================================================\n", " coef std err t P>|t| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------\n", "Intercept 416.9465 740.264 0.563 0.584 -1195.950 2029.843\n", "GNP 0.0679 0.015 4.436 0.001 0.035 0.101\n", "Population -0.3597 0.193 -1.860 0.088 -0.781 0.062\n", "Year -0.1718 0.388 -0.443 0.666 -1.016 0.673\n", "==============================================================================\n", "Omnibus: 1.348 Durbin-Watson: 1.219\n", "Prob(Omnibus): 0.510 Jarque-Bera (JB): 0.640\n", "Skew: 0.489 Prob(JB): 0.726\n", "Kurtosis: 2.934 Cond. No. 1.05e+07\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] The condition number is large, 1.05e+07. This might indicate that there are\n", "strong multicollinearity or other numerical problems.\n", "\"\"\"" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }