{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "26.1414211520072" ], "text/latex": [ "26.1414211520072" ], "text/markdown": [ "26.1414211520072" ], "text/plain": [ "[1] 26.14142" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "19.8225850408262" ], "text/latex": [ "19.8225850408262" ], "text/markdown": [ "19.8225850408262" ], "text/plain": [ "[1] 19.82259" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "19.7825166856023" ], "text/latex": [ "19.7825166856023" ], "text/markdown": [ "19.7825166856023" ], "text/plain": [ "[1] 19.78252" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "23.2955851508862" ], "text/latex": [ "23.2955851508862" ], "text/markdown": [ "23.2955851508862" ], "text/plain": [ "[1] 23.29559" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "18.9012408317778" ], "text/latex": [ "18.9012408317778" ], "text/markdown": [ "18.9012408317778" ], "text/plain": [ "[1] 18.90124" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "19.2573982608642" ], "text/latex": [ "19.2573982608642" ], "text/markdown": [ "19.2573982608642" ], "text/plain": [ "[1] 19.2574" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# The Validation Set Approach\n", "\n", "library(ISLR)\n", "set.seed(1)\n", "train=sample(392,196)\n", "lm.fit=lm(mpg~horsepower,data=Auto,subset=train)\n", "attach(Auto)\n", "mean((mpg-predict(lm.fit,Auto))[-train]^2)\n", "lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)\n", "mean((mpg-predict(lm.fit2,Auto))[-train]^2)\n", "lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)\n", "mean((mpg-predict(lm.fit3,Auto))[-train]^2)\n", "set.seed(2)\n", "train=sample(392,196)\n", "lm.fit=lm(mpg~horsepower,subset=train)\n", "mean((mpg-predict(lm.fit,Auto))[-train]^2)\n", "lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)\n", "mean((mpg-predict(lm.fit2,Auto))[-train]^2)\n", "lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)\n", "mean((mpg-predict(lm.fit3,Auto))[-train]^2)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\t
(Intercept)
\n", "\t\t
39.9358610211705
\n", "\t
horsepower
\n", "\t\t
-0.157844733353654
\n", "
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[(Intercept)] 39.9358610211705\n", "\\item[horsepower] -0.157844733353654\n", "\\end{description*}\n" ], "text/markdown": [ "(Intercept)\n", ": 39.9358610211705horsepower\n", ": -0.157844733353654\n", "\n" ], "text/plain": [ "(Intercept) horsepower \n", " 39.9358610 -0.1578447 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\t
(Intercept)
\n", "\t\t
39.9358610211705
\n", "\t
horsepower
\n", "\t\t
-0.157844733353654
\n", "
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[(Intercept)] 39.9358610211705\n", "\\item[horsepower] -0.157844733353654\n", "\\end{description*}\n" ], "text/markdown": [ "(Intercept)\n", ": 39.9358610211705horsepower\n", ": -0.157844733353654\n", "\n" ], "text/plain": [ "(Intercept) horsepower \n", " 39.9358610 -0.1578447 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    \n", "\t
  1. 24.2315135179292
  2. \n", "\t
  3. 24.2311440937562
  4. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 24.2315135179292\n", "\\item 24.2311440937562\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 24.2315135179292\n", "2. 24.2311440937562\n", "\n", "\n" ], "text/plain": [ "[1] 24.23151 24.23114" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    \n", "\t
  1. 24.2315135179292
  2. \n", "\t
  3. 19.2482131244897
  4. \n", "\t
  5. 19.334984064029
  6. \n", "\t
  7. 19.4244303104302
  8. \n", "\t
  9. 19.0332138547041
  10. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 24.2315135179292\n", "\\item 19.2482131244897\n", "\\item 19.334984064029\n", "\\item 19.4244303104302\n", "\\item 19.0332138547041\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 24.2315135179292\n", "2. 19.2482131244897\n", "3. 19.334984064029\n", "4. 19.4244303104302\n", "5. 19.0332138547041\n", "\n", "\n" ], "text/plain": [ "[1] 24.23151 19.24821 19.33498 19.42443 19.03321" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Leave-One-Out Cross-Validation\n", "\n", "glm.fit=glm(mpg~horsepower,data=Auto)\n", "coef(glm.fit)\n", "lm.fit=lm(mpg~horsepower,data=Auto)\n", "coef(lm.fit)\n", "library(boot)\n", "glm.fit=glm(mpg~horsepower,data=Auto)\n", "cv.err=cv.glm(Auto,glm.fit)\n", "cv.err$delta\n", "cv.error=rep(0,5)\n", "for (i in 1:5){\n", " glm.fit=glm(mpg~poly(horsepower,i),data=Auto)\n", " cv.error[i]=cv.glm(Auto,glm.fit)$delta[1]\n", " }\n", "cv.error" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 24.2051967567753
  2. \n", "\t
  3. 19.1892390663471
  4. \n", "\t
  5. 19.3066158967501
  6. \n", "\t
  7. 19.3379909004929
  8. \n", "\t
  9. 18.8791148363354
  10. \n", "\t
  11. 19.0210341885227
  12. \n", "\t
  13. 18.8960903802809
  14. \n", "\t
  15. 19.7120146188182
  16. \n", "\t
  17. 18.9514005667306
  18. \n", "\t
  19. 19.5019592285538
  20. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 24.2051967567753\n", "\\item 19.1892390663471\n", "\\item 19.3066158967501\n", "\\item 19.3379909004929\n", "\\item 18.8791148363354\n", "\\item 19.0210341885227\n", "\\item 18.8960903802809\n", "\\item 19.7120146188182\n", "\\item 18.9514005667306\n", "\\item 19.5019592285538\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 24.2051967567753\n", "2. 19.1892390663471\n", "3. 19.3066158967501\n", "4. 19.3379909004929\n", "5. 18.8791148363354\n", "6. 19.0210341885227\n", "7. 18.8960903802809\n", "8. 19.7120146188182\n", "9. 18.9514005667306\n", "10. 19.5019592285538\n", "\n", "\n" ], "text/plain": [ " [1] 24.20520 19.18924 19.30662 19.33799 18.87911 19.02103 18.89609 19.71201\n", " [9] 18.95140 19.50196" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# k-Fold Cross-Validation\n", "\n", "set.seed(17)\n", "cv.error.10=rep(0,10)\n", "for (i in 1:10){\n", " glm.fit=glm(mpg~poly(horsepower,i),data=Auto)\n", " cv.error.10[i]=cv.glm(Auto,glm.fit,K=10)$delta[1]\n", " }\n", "cv.error.10" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "0.57583207459283" ], "text/latex": [ "0.57583207459283" ], "text/markdown": [ "0.57583207459283" ], "text/plain": [ "[1] 0.5758321" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "0.596383302006392" ], "text/latex": [ "0.596383302006392" ], "text/markdown": [ "0.596383302006392" ], "text/plain": [ "[1] 0.5963833" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "\n", "ORDINARY NONPARAMETRIC BOOTSTRAP\n", "\n", "\n", "Call:\n", "boot(data = Portfolio, statistic = alpha.fn, R = 1000)\n", "\n", "\n", "Bootstrap Statistics :\n", " original bias std. error\n", "t1* 0.5758321 -7.315422e-05 0.08861826" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# The Bootstrap\n", "\n", "alpha.fn=function(data,index){\n", " X=data$X[index]\n", " Y=data$Y[index]\n", " return((var(Y)-cov(X,Y))/(var(X)+var(Y)-2*cov(X,Y)))\n", " }\n", "alpha.fn(Portfolio,1:100)\n", "set.seed(1)\n", "alpha.fn(Portfolio,sample(100,100,replace=T))\n", "boot(Portfolio,alpha.fn,R=1000)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\t
(Intercept)
\n", "\t\t
39.9358610211705
\n", "\t
horsepower
\n", "\t\t
-0.157844733353654
\n", "
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[(Intercept)] 39.9358610211705\n", "\\item[horsepower] -0.157844733353654\n", "\\end{description*}\n" ], "text/markdown": [ "(Intercept)\n", ": 39.9358610211705horsepower\n", ": -0.157844733353654\n", "\n" ], "text/plain": [ "(Intercept) horsepower \n", " 39.9358610 -0.1578447 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\t
(Intercept)
\n", "\t\t
38.7387133554397
\n", "\t
horsepower
\n", "\t\t
-0.148195186363759
\n", "
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[(Intercept)] 38.7387133554397\n", "\\item[horsepower] -0.148195186363759\n", "\\end{description*}\n" ], "text/markdown": [ "(Intercept)\n", ": 38.7387133554397horsepower\n", ": -0.148195186363759\n", "\n" ], "text/plain": [ "(Intercept) horsepower \n", " 38.7387134 -0.1481952 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\t
(Intercept)
\n", "\t\t
40.0383085722796
\n", "\t
horsepower
\n", "\t\t
-0.159610359262947
\n", "
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[(Intercept)] 40.0383085722796\n", "\\item[horsepower] -0.159610359262947\n", "\\end{description*}\n" ], "text/markdown": [ "(Intercept)\n", ": 40.0383085722796horsepower\n", ": -0.159610359262947\n", "\n" ], "text/plain": [ "(Intercept) horsepower \n", " 40.0383086 -0.1596104 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "\n", "ORDINARY NONPARAMETRIC BOOTSTRAP\n", "\n", "\n", "Call:\n", "boot(data = Auto, statistic = boot.fn, R = 1000)\n", "\n", "\n", "Bootstrap Statistics :\n", " original bias std. error\n", "t1* 39.9358610 0.02972191 0.860007896\n", "t2* -0.1578447 -0.00030823 0.007404467" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Estimating the Accuracy of a Linear Regression Model\n", "\n", "boot.fn=function(data,index)\n", " return(coef(lm(mpg~horsepower,data=data,subset=index)))\n", "boot.fn(Auto,1:392)\n", "set.seed(1)\n", "boot.fn(Auto,sample(392,392,replace=T))\n", "boot.fn(Auto,sample(392,392,replace=T))\n", "boot(Auto,boot.fn,1000)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\n", "
EstimateStd. Errort valuePr(>|t|)
(Intercept) 3.993586e+01 7.174987e-01 5.565984e+011.220362e-187
horsepower-1.578447e-01 6.445501e-03-2.448914e+01 7.031989e-81
\n" ], "text/latex": [ "\\begin{tabular}{r|llll}\n", " & Estimate & Std. Error & t value & Pr(>\\textbar{}t\\textbar{})\\\\\n", "\\hline\n", "\t(Intercept) & 3.993586e+01 & 7.174987e-01 & 5.565984e+01 & 1.220362e-187\\\\\n", "\thorsepower & -1.578447e-01 & 6.445501e-03 & -2.448914e+01 & 7.031989e-81\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "1. 39.9358610211705\n", "2. -0.157844733353654\n", "3. 0.717498655554526\n", "4. 0.00644550051768504\n", "5. 55.6598409098141\n", "6. -24.4891351603436\n", "7. 1.22036159610484e-187\n", "8. 7.03198902940366e-81\n", "\n", "\n" ], "text/plain": [ " Estimate Std. Error t value Pr(>|t|)\n", "(Intercept) 39.9358610 0.717498656 55.65984 1.220362e-187\n", "horsepower -0.1578447 0.006445501 -24.48914 7.031989e-81" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "summary(lm(mpg~horsepower,data=Auto))$coef" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "boot.fn=function(data,index)\n", "coefficients(lm(mpg~horsepower+I(horsepower^2),data=data,subset=index))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\n", "ORDINARY NONPARAMETRIC BOOTSTRAP\n", "\n", "\n", "Call:\n", "boot(data = Auto, statistic = boot.fn, R = 1000)\n", "\n", "\n", "Bootstrap Statistics :\n", " original bias std. error\n", "t1* 56.900099702 6.098115e-03 2.0944855842\n", "t2* -0.466189630 -1.777108e-04 0.0334123802\n", "t3* 0.001230536 1.324315e-06 0.0001208339" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\n", "
EstimateStd. Errort valuePr(>|t|)
(Intercept) 5.690010e+01 1.800427e+00 3.160367e+011.740911e-109
horsepower-4.661896e-01 3.112462e-02-1.497816e+01 2.289429e-40
I(horsepower^2)1.230536e-031.220759e-041.008009e+012.196340e-21
\n" ], "text/latex": [ "\\begin{tabular}{r|llll}\n", " & Estimate & Std. Error & t value & Pr(>\\textbar{}t\\textbar{})\\\\\n", "\\hline\n", "\t(Intercept) & 5.690010e+01 & 1.800427e+00 & 3.160367e+01 & 1.740911e-109\\\\\n", "\thorsepower & -4.661896e-01 & 3.112462e-02 & -1.497816e+01 & 2.289429e-40\\\\\n", "\tI(horsepower\\textasciicircum{}2) & 1.230536e-03 & 1.220759e-04 & 1.008009e+01 & 2.196340e-21\\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "1. 56.900099702113\n", "2. -0.466189629947353\n", "3. 0.00123053610077392\n", "4. 1.80042680630742\n", "5. 0.0311246171195556\n", "6. 0.000122075862760411\n", "7. 31.603672808456\n", "8. -14.978164330717\n", "9. 10.0800934185409\n", "10. 1.74091142668943e-109\n", "11. 2.28942870293134e-40\n", "12. 2.19633965988939e-21\n", "\n", "\n" ], "text/plain": [ " Estimate Std. Error t value Pr(>|t|)\n", "(Intercept) 56.900099702 1.8004268063 31.60367 1.740911e-109\n", "horsepower -0.466189630 0.0311246171 -14.97816 2.289429e-40\n", "I(horsepower^2) 0.001230536 0.0001220759 10.08009 2.196340e-21" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "set.seed(1)\n", "boot(Auto,boot.fn,1000)\n", "summary(lm(mpg~horsepower+I(horsepower^2),data=Auto))$coef" ] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.3.1" } }, "nbformat": 4, "nbformat_minor": 0 }