{ "cells": [ { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "# Chapter 7 - Moving Beyond Linearity" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "- [Lab 7.8.1 Polynomial Regression and Step Functions](#lab-7.8.1)\n", "- [Lab 7.8.2 Splines](#lab-7.8.2)\n", "- [Lab 7.8.3 GAMs](#lab-7.8.3)" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "### Imports and Configurations" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "# Standard imports\n", "import warnings\n", "\n", "# Use rpy2 for loading R datasets\n", "from rpy2.robjects.packages import importr\n", "from rpy2.robjects.packages import data as rdata\n", "from rpy2.robjects import pandas2ri\n", "\n", "# Math and data processing\n", "import numpy as np\n", "import scipy as sp\n", "import pandas as pd\n", "\n", "# StatsModels\n", "import statsmodels.api as sm\n", "import statsmodels.formula.api as smf\n", "from statsmodels.stats.anova import anova_lm\n", "from patsy import dmatrix\n", "\n", "# scikit-learn\n", "from sklearn.preprocessing import PolynomialFeatures\n", "\n", "# Visulization\n", "from IPython.display import display\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "mpl.style.use('ggplot')" ] }, { "cell_type": "markdown", "metadata": { "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "source": [ "**Load wage data set from R ISLR library.**" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "autoscroll": "json-false", "collapsed": false, "ein.tags": [ "worksheet-0" ], "slideshow": { "slide_type": "-" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearagesexmaritlraceeducationregionjobclasshealthhealth_inslogwagewage
2316552006181. Male1. Never Married1. White1. < HS Grad2. Middle Atlantic1. Industrial1. <=Good2. No4.31806375.043154
865822004241. Male1. Never Married1. White4. College Grad2. Middle Atlantic2. Information2. >=Very Good2. No4.25527370.476020
1613002003451. Male2. Married1. White3. Some College2. Middle Atlantic1. Industrial1. <=Good1. Yes4.875061130.982177
1551592003431. Male2. Married3. Asian4. College Grad2. Middle Atlantic2. Information2. >=Very Good1. Yes5.041393154.685293
114432005501. Male4. Divorced1. White2. HS Grad2. Middle Atlantic2. Information1. <=Good1. Yes4.31806375.043154
\n", "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
df_residssrdf_diffss_diffFPr(>F)
02998.05.022216e+060.0NaNNaNNaN
12997.04.793430e+061.0228786.010128143.5931072.363850e-32
22996.04.777674e+061.015755.6936649.8887561.679202e-03
32995.04.771604e+061.06070.1521243.8098135.104620e-02
42994.04.770322e+061.01282.5630170.8049763.696820e-01
\n", "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
(Intercept)(33.5, 49](49, 64.5](64.5, 80]
2316551.00.00.00.0
865821.00.00.00.0
1613001.01.00.00.0
1551591.01.00.00.0
114431.00.01.00.0
3766621.00.01.00.0
4506011.01.00.00.0
3779541.00.00.00.0
2289631.01.00.00.0
814041.00.01.00.0
3027781.01.00.00.0
3057061.01.00.00.0
86901.01.00.00.0
1535611.01.00.00.0
4496541.00.01.00.0
4476601.00.01.00.0
1601911.01.00.00.0
2303121.00.01.00.0
3015851.00.01.00.0
1536821.01.00.00.0
1582261.01.00.00.0
111411.01.00.00.0
4484101.00.00.01.0
3051161.01.00.00.0
2330021.01.00.00.0
\n", "