--- title: "Graphing Residuals" date: 'Last compiled: `r format(Sys.time(), "%A, %B %d, %Y - %X.")`' author: 'Alan T. Arnholt' output: bookdown::html_document2 --- ```{r label = "setup", include = FALSE, message = FALSE} library(ggplot2) knitr::opts_chunk$set(echo = TRUE, comment = NA, fig.align = "center", warning = FALSE, message = FALSE) ``` # (APPENDIX) Appendix A {-} ## Residuals The $i^{th}$ residual is defined as $\hat{\epsilon}_i = y_i - \hat{y}_i$. ```{r} library(dplyr) library(PASWR2) head(HSWRESTLER) mod_null <- lm(hwfat ~ 1, data = HSWRESTLER) summary(mod_null) anova(mod_null) HSWRESTLER %>% summarize(Intercept = mean(hwfat)) mod_slr <- lm(hwfat ~ abs, data = HSWRESTLER) summary(mod_slr) anova(mod_slr) ``` * Use the function `augment` from the `broom` package ```{r} library(broom) mod_slr %>% augment() %>% head() %>% round(3) NDF1 <- mod_slr %>% augment() NDF2 <- mod_null %>% augment() NDF2$abs <- HSWRESTLER$abs ``` ## Graphing ### Residuals for SLR model ```{r} ggplot(data = NDF1, aes(x = abs, y = hwfat)) + geom_point(color = "purple") + theme_bw() + geom_line(aes(x = abs, y = .fitted), color = "gray") + geom_segment(aes(x = abs, xend = abs, y = hwfat, yend = .fitted), size = 0.25) ``` ### Residuals for Null model ```{r} ggplot(data = NDF2, aes(x = abs, y = hwfat)) + geom_point(color = "purple") + theme_bw() + geom_line(aes(x = abs, y = .fitted), color = "gray") + geom_segment(aes(x = abs, xend = abs, y = hwfat, yend = .fitted), size = 0.25) ``` ### Creating one dataframe with new column The code below creates one dataframe (`ndf`) with the information from the `null` model appended to the bottom of the information from the `slr` model. ```{r} ndf1 <- augment(mod_slr) ndf2 <- augment(mod_null) DT::datatable(head(ndf1)) DT::datatable(head(ndf2)) ndf2$abs <- HSWRESTLER$abs # Note that abs is not in ndf2....so must add ndf <- bind_rows(ndf1, ndf2) %>% mutate(model = rep(c("slr", "null"), each = 78)) DT::datatable(ndf) ``` ```{r, fig.width = 8} ggplot(data = ndf, aes(x = abs, y = hwfat)) + facet_grid(.~ model) + geom_line(aes(x = abs, y = .fitted), color = "gray") + theme_bw() + geom_segment(aes(x = abs, xend = abs, y = hwfat, yend = .fitted), size = 0.25, linetype = "dotted") + geom_point(color = "lightblue") ``` The graph above is created with a Figure Caption and hidden code in Figure \@ref(fig:BG). ### Tweakers Note that `mod_null` and `mod_slr` are lists with the results from the `lm` call. ```{r} str(mod_null) str(mod_slr) ``` Consider the following: ```{r} mod_null <- mod_null %>% augment() mod_null$abs <- HSWRESTLER$abs mod_slr <- mod_slr %>% augment() ``` ```{r} DT::datatable(mod_null) ``` * Compute the mean squared error ($MSE$) for the null model `mod_null`. ```{r} # Compute SSE for null model mod_null %>% summarize(MSE = var(hwfat)) ``` * Compute the mean squared error ($MSE$) for the regression model `mod_slr`. ```{r} # Compute SSE for regression model mod_slr %>% summarize(MSE = var(.resid)) ``` ## Graph with caption ```{r, label = "BG", echo = FALSE, fig.cap = "What you want your caption to say goes here", fig.width = 8} mod_null <- lm(hwfat ~ 1, data = HSWRESTLER) mod_slr <- lm(hwfat ~ abs, data = HSWRESTLER) ndf1 <- augment(mod_slr) ndf2 <- augment(mod_null) ndf2$abs <- HSWRESTLER$abs # Note that abs is not in ndf2....so must add ndf <- bind_rows(ndf1, ndf2) %>% mutate(model = rep(c("slr", "null"), each = 78)) mod_null <- mod_null %>% augment() mod_slr <- mod_slr %>% augment() ggplot(data = ndf, aes(x = abs, y = hwfat)) + facet_grid(.~ model) + geom_line(aes(x = abs, y = .fitted), color = "gray") + theme_bw() + geom_segment(aes(x = abs, xend = abs, y = hwfat, yend = .fitted), size = 0.25, linetype = "dotted") + geom_point(color = "blue") ```