{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"26.1414211520072"
],
"text/latex": [
"26.1414211520072"
],
"text/markdown": [
"26.1414211520072"
],
"text/plain": [
"[1] 26.14142"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"19.8225850408262"
],
"text/latex": [
"19.8225850408262"
],
"text/markdown": [
"19.8225850408262"
],
"text/plain": [
"[1] 19.82259"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"19.7825166856023"
],
"text/latex": [
"19.7825166856023"
],
"text/markdown": [
"19.7825166856023"
],
"text/plain": [
"[1] 19.78252"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"23.2955851508862"
],
"text/latex": [
"23.2955851508862"
],
"text/markdown": [
"23.2955851508862"
],
"text/plain": [
"[1] 23.29559"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"18.9012408317778"
],
"text/latex": [
"18.9012408317778"
],
"text/markdown": [
"18.9012408317778"
],
"text/plain": [
"[1] 18.90124"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"19.2573982608642"
],
"text/latex": [
"19.2573982608642"
],
"text/markdown": [
"19.2573982608642"
],
"text/plain": [
"[1] 19.2574"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# The Validation Set Approach\n",
"\n",
"library(ISLR)\n",
"set.seed(1)\n",
"train=sample(392,196)\n",
"lm.fit=lm(mpg~horsepower,data=Auto,subset=train)\n",
"attach(Auto)\n",
"mean((mpg-predict(lm.fit,Auto))[-train]^2)\n",
"lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)\n",
"mean((mpg-predict(lm.fit2,Auto))[-train]^2)\n",
"lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)\n",
"mean((mpg-predict(lm.fit3,Auto))[-train]^2)\n",
"set.seed(2)\n",
"train=sample(392,196)\n",
"lm.fit=lm(mpg~horsepower,subset=train)\n",
"mean((mpg-predict(lm.fit,Auto))[-train]^2)\n",
"lm.fit2=lm(mpg~poly(horsepower,2),data=Auto,subset=train)\n",
"mean((mpg-predict(lm.fit2,Auto))[-train]^2)\n",
"lm.fit3=lm(mpg~poly(horsepower,3),data=Auto,subset=train)\n",
"mean((mpg-predict(lm.fit3,Auto))[-train]^2)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\t- (Intercept)
\n",
"\t\t- 39.9358610211705
\n",
"\t- horsepower
\n",
"\t\t- -0.157844733353654
\n",
"
\n"
],
"text/latex": [
"\\begin{description*}\n",
"\\item[(Intercept)] 39.9358610211705\n",
"\\item[horsepower] -0.157844733353654\n",
"\\end{description*}\n"
],
"text/markdown": [
"(Intercept)\n",
": 39.9358610211705horsepower\n",
": -0.157844733353654\n",
"\n"
],
"text/plain": [
"(Intercept) horsepower \n",
" 39.9358610 -0.1578447 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\t- (Intercept)
\n",
"\t\t- 39.9358610211705
\n",
"\t- horsepower
\n",
"\t\t- -0.157844733353654
\n",
"
\n"
],
"text/latex": [
"\\begin{description*}\n",
"\\item[(Intercept)] 39.9358610211705\n",
"\\item[horsepower] -0.157844733353654\n",
"\\end{description*}\n"
],
"text/markdown": [
"(Intercept)\n",
": 39.9358610211705horsepower\n",
": -0.157844733353654\n",
"\n"
],
"text/plain": [
"(Intercept) horsepower \n",
" 39.9358610 -0.1578447 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\t- 24.2315135179292
\n",
"\t- 24.2311440937562
\n",
"
\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 24.2315135179292\n",
"\\item 24.2311440937562\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 24.2315135179292\n",
"2. 24.2311440937562\n",
"\n",
"\n"
],
"text/plain": [
"[1] 24.23151 24.23114"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\t- 24.2315135179292
\n",
"\t- 19.2482131244897
\n",
"\t- 19.334984064029
\n",
"\t- 19.4244303104302
\n",
"\t- 19.0332138547041
\n",
"
\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 24.2315135179292\n",
"\\item 19.2482131244897\n",
"\\item 19.334984064029\n",
"\\item 19.4244303104302\n",
"\\item 19.0332138547041\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 24.2315135179292\n",
"2. 19.2482131244897\n",
"3. 19.334984064029\n",
"4. 19.4244303104302\n",
"5. 19.0332138547041\n",
"\n",
"\n"
],
"text/plain": [
"[1] 24.23151 19.24821 19.33498 19.42443 19.03321"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Leave-One-Out Cross-Validation\n",
"\n",
"glm.fit=glm(mpg~horsepower,data=Auto)\n",
"coef(glm.fit)\n",
"lm.fit=lm(mpg~horsepower,data=Auto)\n",
"coef(lm.fit)\n",
"library(boot)\n",
"glm.fit=glm(mpg~horsepower,data=Auto)\n",
"cv.err=cv.glm(Auto,glm.fit)\n",
"cv.err$delta\n",
"cv.error=rep(0,5)\n",
"for (i in 1:5){\n",
" glm.fit=glm(mpg~poly(horsepower,i),data=Auto)\n",
" cv.error[i]=cv.glm(Auto,glm.fit)$delta[1]\n",
" }\n",
"cv.error"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\t- 24.2051967567753
\n",
"\t- 19.1892390663471
\n",
"\t- 19.3066158967501
\n",
"\t- 19.3379909004929
\n",
"\t- 18.8791148363354
\n",
"\t- 19.0210341885227
\n",
"\t- 18.8960903802809
\n",
"\t- 19.7120146188182
\n",
"\t- 18.9514005667306
\n",
"\t- 19.5019592285538
\n",
"
\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 24.2051967567753\n",
"\\item 19.1892390663471\n",
"\\item 19.3066158967501\n",
"\\item 19.3379909004929\n",
"\\item 18.8791148363354\n",
"\\item 19.0210341885227\n",
"\\item 18.8960903802809\n",
"\\item 19.7120146188182\n",
"\\item 18.9514005667306\n",
"\\item 19.5019592285538\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 24.2051967567753\n",
"2. 19.1892390663471\n",
"3. 19.3066158967501\n",
"4. 19.3379909004929\n",
"5. 18.8791148363354\n",
"6. 19.0210341885227\n",
"7. 18.8960903802809\n",
"8. 19.7120146188182\n",
"9. 18.9514005667306\n",
"10. 19.5019592285538\n",
"\n",
"\n"
],
"text/plain": [
" [1] 24.20520 19.18924 19.30662 19.33799 18.87911 19.02103 18.89609 19.71201\n",
" [9] 18.95140 19.50196"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# k-Fold Cross-Validation\n",
"\n",
"set.seed(17)\n",
"cv.error.10=rep(0,10)\n",
"for (i in 1:10){\n",
" glm.fit=glm(mpg~poly(horsepower,i),data=Auto)\n",
" cv.error.10[i]=cv.glm(Auto,glm.fit,K=10)$delta[1]\n",
" }\n",
"cv.error.10"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"0.57583207459283"
],
"text/latex": [
"0.57583207459283"
],
"text/markdown": [
"0.57583207459283"
],
"text/plain": [
"[1] 0.5758321"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"0.596383302006392"
],
"text/latex": [
"0.596383302006392"
],
"text/markdown": [
"0.596383302006392"
],
"text/plain": [
"[1] 0.5963833"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"\n",
"ORDINARY NONPARAMETRIC BOOTSTRAP\n",
"\n",
"\n",
"Call:\n",
"boot(data = Portfolio, statistic = alpha.fn, R = 1000)\n",
"\n",
"\n",
"Bootstrap Statistics :\n",
" original bias std. error\n",
"t1* 0.5758321 -7.315422e-05 0.08861826"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# The Bootstrap\n",
"\n",
"alpha.fn=function(data,index){\n",
" X=data$X[index]\n",
" Y=data$Y[index]\n",
" return((var(Y)-cov(X,Y))/(var(X)+var(Y)-2*cov(X,Y)))\n",
" }\n",
"alpha.fn(Portfolio,1:100)\n",
"set.seed(1)\n",
"alpha.fn(Portfolio,sample(100,100,replace=T))\n",
"boot(Portfolio,alpha.fn,R=1000)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\t- (Intercept)
\n",
"\t\t- 39.9358610211705
\n",
"\t- horsepower
\n",
"\t\t- -0.157844733353654
\n",
"
\n"
],
"text/latex": [
"\\begin{description*}\n",
"\\item[(Intercept)] 39.9358610211705\n",
"\\item[horsepower] -0.157844733353654\n",
"\\end{description*}\n"
],
"text/markdown": [
"(Intercept)\n",
": 39.9358610211705horsepower\n",
": -0.157844733353654\n",
"\n"
],
"text/plain": [
"(Intercept) horsepower \n",
" 39.9358610 -0.1578447 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\t- (Intercept)
\n",
"\t\t- 38.7387133554397
\n",
"\t- horsepower
\n",
"\t\t- -0.148195186363759
\n",
"
\n"
],
"text/latex": [
"\\begin{description*}\n",
"\\item[(Intercept)] 38.7387133554397\n",
"\\item[horsepower] -0.148195186363759\n",
"\\end{description*}\n"
],
"text/markdown": [
"(Intercept)\n",
": 38.7387133554397horsepower\n",
": -0.148195186363759\n",
"\n"
],
"text/plain": [
"(Intercept) horsepower \n",
" 38.7387134 -0.1481952 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\t- (Intercept)
\n",
"\t\t- 40.0383085722796
\n",
"\t- horsepower
\n",
"\t\t- -0.159610359262947
\n",
"
\n"
],
"text/latex": [
"\\begin{description*}\n",
"\\item[(Intercept)] 40.0383085722796\n",
"\\item[horsepower] -0.159610359262947\n",
"\\end{description*}\n"
],
"text/markdown": [
"(Intercept)\n",
": 40.0383085722796horsepower\n",
": -0.159610359262947\n",
"\n"
],
"text/plain": [
"(Intercept) horsepower \n",
" 40.0383086 -0.1596104 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"\n",
"ORDINARY NONPARAMETRIC BOOTSTRAP\n",
"\n",
"\n",
"Call:\n",
"boot(data = Auto, statistic = boot.fn, R = 1000)\n",
"\n",
"\n",
"Bootstrap Statistics :\n",
" original bias std. error\n",
"t1* 39.9358610 0.02972191 0.860007896\n",
"t2* -0.1578447 -0.00030823 0.007404467"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Estimating the Accuracy of a Linear Regression Model\n",
"\n",
"boot.fn=function(data,index)\n",
" return(coef(lm(mpg~horsepower,data=data,subset=index)))\n",
"boot.fn(Auto,1:392)\n",
"set.seed(1)\n",
"boot.fn(Auto,sample(392,392,replace=T))\n",
"boot.fn(Auto,sample(392,392,replace=T))\n",
"boot(Auto,boot.fn,1000)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" | Estimate | Std. Error | t value | Pr(>|t|) |
\n",
"\n",
"\t(Intercept) | 3.993586e+01 | 7.174987e-01 | 5.565984e+01 | 1.220362e-187 |
\n",
"\thorsepower | -1.578447e-01 | 6.445501e-03 | -2.448914e+01 | 7.031989e-81 |
\n",
"\n",
"
\n"
],
"text/latex": [
"\\begin{tabular}{r|llll}\n",
" & Estimate & Std. Error & t value & Pr(>\\textbar{}t\\textbar{})\\\\\n",
"\\hline\n",
"\t(Intercept) & 3.993586e+01 & 7.174987e-01 & 5.565984e+01 & 1.220362e-187\\\\\n",
"\thorsepower & -1.578447e-01 & 6.445501e-03 & -2.448914e+01 & 7.031989e-81\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"1. 39.9358610211705\n",
"2. -0.157844733353654\n",
"3. 0.717498655554526\n",
"4. 0.00644550051768504\n",
"5. 55.6598409098141\n",
"6. -24.4891351603436\n",
"7. 1.22036159610484e-187\n",
"8. 7.03198902940366e-81\n",
"\n",
"\n"
],
"text/plain": [
" Estimate Std. Error t value Pr(>|t|)\n",
"(Intercept) 39.9358610 0.717498656 55.65984 1.220362e-187\n",
"horsepower -0.1578447 0.006445501 -24.48914 7.031989e-81"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"summary(lm(mpg~horsepower,data=Auto))$coef"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"boot.fn=function(data,index)\n",
"coefficients(lm(mpg~horsepower+I(horsepower^2),data=data,subset=index))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"\n",
"ORDINARY NONPARAMETRIC BOOTSTRAP\n",
"\n",
"\n",
"Call:\n",
"boot(data = Auto, statistic = boot.fn, R = 1000)\n",
"\n",
"\n",
"Bootstrap Statistics :\n",
" original bias std. error\n",
"t1* 56.900099702 6.098115e-03 2.0944855842\n",
"t2* -0.466189630 -1.777108e-04 0.0334123802\n",
"t3* 0.001230536 1.324315e-06 0.0001208339"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" | Estimate | Std. Error | t value | Pr(>|t|) |
\n",
"\n",
"\t(Intercept) | 5.690010e+01 | 1.800427e+00 | 3.160367e+01 | 1.740911e-109 |
\n",
"\thorsepower | -4.661896e-01 | 3.112462e-02 | -1.497816e+01 | 2.289429e-40 |
\n",
"\tI(horsepower^2) | 1.230536e-03 | 1.220759e-04 | 1.008009e+01 | 2.196340e-21 |
\n",
"\n",
"
\n"
],
"text/latex": [
"\\begin{tabular}{r|llll}\n",
" & Estimate & Std. Error & t value & Pr(>\\textbar{}t\\textbar{})\\\\\n",
"\\hline\n",
"\t(Intercept) & 5.690010e+01 & 1.800427e+00 & 3.160367e+01 & 1.740911e-109\\\\\n",
"\thorsepower & -4.661896e-01 & 3.112462e-02 & -1.497816e+01 & 2.289429e-40\\\\\n",
"\tI(horsepower\\textasciicircum{}2) & 1.230536e-03 & 1.220759e-04 & 1.008009e+01 & 2.196340e-21\\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"1. 56.900099702113\n",
"2. -0.466189629947353\n",
"3. 0.00123053610077392\n",
"4. 1.80042680630742\n",
"5. 0.0311246171195556\n",
"6. 0.000122075862760411\n",
"7. 31.603672808456\n",
"8. -14.978164330717\n",
"9. 10.0800934185409\n",
"10. 1.74091142668943e-109\n",
"11. 2.28942870293134e-40\n",
"12. 2.19633965988939e-21\n",
"\n",
"\n"
],
"text/plain": [
" Estimate Std. Error t value Pr(>|t|)\n",
"(Intercept) 56.900099702 1.8004268063 31.60367 1.740911e-109\n",
"horsepower -0.466189630 0.0311246171 -14.97816 2.289429e-40\n",
"I(horsepower^2) 0.001230536 0.0001220759 10.08009 2.196340e-21"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set.seed(1)\n",
"boot(Auto,boot.fn,1000)\n",
"summary(lm(mpg~horsepower+I(horsepower^2),data=Auto))$coef"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.3.1"
}
},
"nbformat": 4,
"nbformat_minor": 0
}