{ "metadata": { "name": "" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Chapter 6: Feature Selection ##" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from __future__ import division\n", "import pandas as pd\n", "import numpy as np\n", "import scipy as sp\n", "import matplotlib.pyplot as plt\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.linear_model import Ridge\n", "from sklearn.linear_model import Lasso\n", "from sklearn.feature_selection import SelectKBest\n", "from sklearn.feature_selection import f_regression\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.cross_validation import KFold\n", "%matplotlib inline" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "hitters_df = pd.read_csv(\"../data/Hitters.csv\")\n", "hitters_df.dropna(inplace=True)\n", "hitters_df.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | AtBat | \n", "Hits | \n", "HmRun | \n", "Runs | \n", "RBI | \n", "Walks | \n", "Years | \n", "CAtBat | \n", "CHits | \n", "CHmRun | \n", "CRuns | \n", "CRBI | \n", "CWalks | \n", "League | \n", "Division | \n", "PutOuts | \n", "Assists | \n", "Errors | \n", "Salary | \n", "NewLeague | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "315 | \n", "81 | \n", "7 | \n", "24 | \n", "38 | \n", "39 | \n", "14 | \n", "3449 | \n", "835 | \n", "69 | \n", "321 | \n", "414 | \n", "375 | \n", "N | \n", "W | \n", "632 | \n", "43 | \n", "10 | \n", "475.0 | \n", "N | \n", "
2 | \n", "479 | \n", "130 | \n", "18 | \n", "66 | \n", "72 | \n", "76 | \n", "3 | \n", "1624 | \n", "457 | \n", "63 | \n", "224 | \n", "266 | \n", "263 | \n", "A | \n", "W | \n", "880 | \n", "82 | \n", "14 | \n", "480.0 | \n", "A | \n", "
3 | \n", "496 | \n", "141 | \n", "20 | \n", "65 | \n", "78 | \n", "37 | \n", "11 | \n", "5628 | \n", "1575 | \n", "225 | \n", "828 | \n", "838 | \n", "354 | \n", "N | \n", "E | \n", "200 | \n", "11 | \n", "3 | \n", "500.0 | \n", "N | \n", "
4 | \n", "321 | \n", "87 | \n", "10 | \n", "39 | \n", "42 | \n", "30 | \n", "2 | \n", "396 | \n", "101 | \n", "12 | \n", "48 | \n", "46 | \n", "33 | \n", "N | \n", "E | \n", "805 | \n", "40 | \n", "4 | \n", "91.5 | \n", "N | \n", "
5 | \n", "594 | \n", "169 | \n", "4 | \n", "74 | \n", "51 | \n", "35 | \n", "11 | \n", "4408 | \n", "1133 | \n", "19 | \n", "501 | \n", "336 | \n", "194 | \n", "A | \n", "W | \n", "282 | \n", "421 | \n", "25 | \n", "750.0 | \n", "A | \n", "
5 rows \u00d7 20 columns
\n", "\n", " | AtBat | \n", "Hits | \n", "HmRun | \n", "Runs | \n", "RBI | \n", "Walks | \n", "Years | \n", "CAtBat | \n", "CHits | \n", "CHmRun | \n", "CRuns | \n", "CRBI | \n", "CWalks | \n", "League | \n", "Division | \n", "PutOuts | \n", "Assists | \n", "Errors | \n", "Salary | \n", "NewLeague | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "315 | \n", "81 | \n", "7 | \n", "24 | \n", "38 | \n", "39 | \n", "14 | \n", "3449 | \n", "835 | \n", "69 | \n", "321 | \n", "414 | \n", "375 | \n", "0 | \n", "0 | \n", "632 | \n", "43 | \n", "10 | \n", "475.0 | \n", "0 | \n", "
2 | \n", "479 | \n", "130 | \n", "18 | \n", "66 | \n", "72 | \n", "76 | \n", "3 | \n", "1624 | \n", "457 | \n", "63 | \n", "224 | \n", "266 | \n", "263 | \n", "1 | \n", "0 | \n", "880 | \n", "82 | \n", "14 | \n", "480.0 | \n", "1 | \n", "
3 | \n", "496 | \n", "141 | \n", "20 | \n", "65 | \n", "78 | \n", "37 | \n", "11 | \n", "5628 | \n", "1575 | \n", "225 | \n", "828 | \n", "838 | \n", "354 | \n", "0 | \n", "1 | \n", "200 | \n", "11 | \n", "3 | \n", "500.0 | \n", "0 | \n", "
4 | \n", "321 | \n", "87 | \n", "10 | \n", "39 | \n", "42 | \n", "30 | \n", "2 | \n", "396 | \n", "101 | \n", "12 | \n", "48 | \n", "46 | \n", "33 | \n", "0 | \n", "1 | \n", "805 | \n", "40 | \n", "4 | \n", "91.5 | \n", "0 | \n", "
5 | \n", "594 | \n", "169 | \n", "4 | \n", "74 | \n", "51 | \n", "35 | \n", "11 | \n", "4408 | \n", "1133 | \n", "19 | \n", "501 | \n", "336 | \n", "194 | \n", "1 | \n", "0 | \n", "282 | \n", "421 | \n", "25 | \n", "750.0 | \n", "1 | \n", "
5 rows \u00d7 20 columns
\n", "