{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Chapter 7: Nonlinear Models ###" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import division\n", "import pandas as pd\n", "import numpy as np\n", "import scipy as sp\n", "import matplotlib.pyplot as plt\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.feature_selection import SelectKBest\n", "from sklearn.feature_selection import f_regression\n", "from sklearn.feature_selection import chi2\n", "from sklearn.cluster import MeanShift\n", "from scipy.interpolate import LSQUnivariateSpline\n", "%matplotlib inline" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "# Data comes from ISLR::Wage - written using write.csv(Wage, \"wage.csv\", row.names=FALSE)\n", "wage_df = pd.read_csv(\"../data/Wage.csv\")\n", "wage_df.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | year | \n", "age | \n", "sex | \n", "maritl | \n", "race | \n", "education | \n", "region | \n", "jobclass | \n", "health | \n", "health_ins | \n", "logwage | \n", "wage | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2006 | \n", "18 | \n", "1. Male | \n", "1. Never Married | \n", "1. White | \n", "1. < HS Grad | \n", "2. Middle Atlantic | \n", "1. Industrial | \n", "1. <=Good | \n", "2. No | \n", "4.318063 | \n", "75.043154 | \n", "
1 | \n", "2004 | \n", "24 | \n", "1. Male | \n", "1. Never Married | \n", "1. White | \n", "4. College Grad | \n", "2. Middle Atlantic | \n", "2. Information | \n", "2. >=Very Good | \n", "2. No | \n", "4.255273 | \n", "70.476020 | \n", "
2 | \n", "2003 | \n", "45 | \n", "1. Male | \n", "2. Married | \n", "1. White | \n", "3. Some College | \n", "2. Middle Atlantic | \n", "1. Industrial | \n", "1. <=Good | \n", "1. Yes | \n", "4.875061 | \n", "130.982177 | \n", "
3 | \n", "2003 | \n", "43 | \n", "1. Male | \n", "2. Married | \n", "3. Asian | \n", "4. College Grad | \n", "2. Middle Atlantic | \n", "2. Information | \n", "2. >=Very Good | \n", "1. Yes | \n", "5.041393 | \n", "154.685293 | \n", "
4 | \n", "2005 | \n", "50 | \n", "1. Male | \n", "4. Divorced | \n", "1. White | \n", "2. HS Grad | \n", "2. Middle Atlantic | \n", "2. Information | \n", "1. <=Good | \n", "1. Yes | \n", "4.318063 | \n", "75.043154 | \n", "
5 rows \u00d7 12 columns
\n", "\n", " | year | \n", "age | \n", "sex | \n", "maritl | \n", "race | \n", "education | \n", "region | \n", "jobclass | \n", "health | \n", "health_ins | \n", "logwage | \n", "wage | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2006 | \n", "18 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1. Industrial | \n", "0 | \n", "0 | \n", "4.318063 | \n", "75.043154 | \n", "
1 | \n", "2004 | \n", "24 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "2. Information | \n", "1 | \n", "0 | \n", "4.255273 | \n", "70.476020 | \n", "
2 | \n", "2003 | \n", "45 | \n", "0 | \n", "1 | \n", "0 | \n", "2 | \n", "0 | \n", "1. Industrial | \n", "0 | \n", "1 | \n", "4.875061 | \n", "130.982177 | \n", "
3 | \n", "2003 | \n", "43 | \n", "0 | \n", "1 | \n", "1 | \n", "1 | \n", "0 | \n", "2. Information | \n", "1 | \n", "1 | \n", "5.041393 | \n", "154.685293 | \n", "
4 | \n", "2005 | \n", "50 | \n", "0 | \n", "2 | \n", "0 | \n", "3 | \n", "0 | \n", "2. Information | \n", "0 | \n", "1 | \n", "4.318063 | \n", "75.043154 | \n", "
5 rows \u00d7 12 columns
\n", "