# function for a full linear model that haves covariates and factors lm_out <- function(fit_model = fitted_model, out_num = 2){ require(tidyverse) data <- fit_model[["model"]] #mahalanobis data_mahal <- data %>% select_if(is.numeric) mahal_model <- data_mahal %>% mahalanobis(.,colMeans(.),cov(.)) cut_mahal <- qchisq(1-.001, ncol(data_mahal)) df_mahal <- ncol(data_mahal) badmahal <- as.numeric(mahal_model > cut_mahal) out_mahal <- table(badmahal) message_mahal <- paste(c("the cut off for mahalanobis values considering a p-value of less than .001 was chi-square(", df_mahal, ") = ", cut_mahal), collapse = '') #leverage leverage <- hatvalues(fit_model) iv_number <- (length(coef(fit_model)) - 1) #minus the intercept cutleverage <- (2*iv_number+2) / nrow(data) message_leverage = paste(c("the cut off for leverage values was ", cutleverage), collapse = '') badleverage <- as.numeric(leverage > cutleverage) out_leverage <- table(badleverage) #cooks cooks <- cooks.distance(fit_model) cutcooks <- 4 / (nrow(data) - iv_number - 1) message_cooks <- paste(c("the cut off for cooks values was ", cutcooks), collapse = '') badcooks <- as.numeric(cooks > cutcooks) out_cooks <- table(badcooks) #total totalout <- badmahal + badleverage + badcooks total_outliers = table(totalout) #new data noout_data <- subset(data, totalout < out_num) noout_data <- as_tibble(noout_data) excluded_outs <- (NROW(data) - NROW(noout_data)) output = list(total_outliers = total_outliers, #totalout = totalout, message_mahal = message_mahal, message_leverage = message_leverage, message_cooks = message_cooks, out_mahal = out_mahal, out_cooks = out_cooks, out_leverage = out_leverage, noout_data = noout_data) print(paste(c("a total of ", excluded_outs, " outliers should be excluded"), collapse = '')) return(output) } # function for a linear model that only have factors (a.k.a. ANOVAs) lm_outCat <- function(fit_model = fitted_model, out_num = 1){ require(tidyverse) data <- fit_model[["model"]] #leverage leverage <- hatvalues(fit_model) iv_number <- (length(coef(fit_model)) - 1) #minus the intercept cutleverage <- (2*iv_number+2) / nrow(data) message_leverage = paste(c("the cut off for leverage values was ", cutleverage), collapse = '') badleverage <- as.numeric(leverage > cutleverage) out_leverage <- table(badleverage) #cooks cooks <- cooks.distance(fit_model) cutcooks <- 4 / (nrow(data) - iv_number - 1) message_cooks <- paste(c("the cut off for cooks values was ", cutcooks), collapse = '') badcooks <- as.numeric(cooks > cutcooks) out_cooks <- table(badcooks) #total totalout <- badleverage + badcooks total_outliers = table(totalout) #new data noout_data <- subset(data, totalout < out_num) noout_data <- as_tibble(noout_data) excluded_outs <- (NROW(data) - NROW(noout_data)) output = list(total_outliers = total_outliers, #totalout = totalout, message_leverage = message_leverage, message_cooks = message_cooks, out_cooks = out_cooks, out_leverage = out_leverage, noout_data = noout_data) print(paste(c("a total of ", excluded_outs, " outliers should be excluded"), collapse = '')) return(output) } # check the residuals assumptions lm_assump <- function(fit_model = fitted_model){ require(tidyverse) require(ggpubr, warn.conflicts = F) df = data.frame(standardized = rstudent(fit_model), fitted = scale(fit_model$fitted.values)) #normality shap = shapiro.test(fit_model$residuals) p_interpret = ifelse(shap$p.value < .05, "non-normal", "normal") w = round(shap$statistic, digits = 2) p = round(shap$p.value, digits = 3) results_shapiro = paste(c("Shapiro-wilk test suggested that the residuals were ", p_interpret,", w = ", w[[1]], " p = ", p[[1]]), collapse = '') hist_res = ggplot(df, aes(x=standardized)) + geom_density(alpha=.2, fill="lightgray")+ geom_histogram(aes(y=..density..), bins = 30, colour="black", fill="white")+ theme(axis.text.x = element_text(colour="black",size=10), panel.background = element_blank(), axis.line = element_line(colour = "black"), axis.text.y = element_text(colour="black",size=11))+ scale_x_continuous(breaks=c(-3,-2,-1,0,1,2,3))+ ggtitle("Residuals distribution") #linearity qq_res = ggplot(df, aes(sample=standardized))+stat_qq()+ theme(axis.text.x = element_text(colour="black",size=10), panel.background = element_blank(), axis.line = element_line(colour = "black"), axis.text.y = element_text(colour="black",size=11))+ stat_qq_line()+ scale_y_continuous(breaks=c(-3,-2,-1,0,1,2,3))+ scale_x_continuous(breaks=c(-3,-2,-1,0,1,2,3))+ ggtitle("Q-Q plot") #homogeneity and homoscedasticity homo_res = ggplot(df, aes(x = fitted, y = standardized))+ geom_point()+ theme(axis.text.x = element_text(colour="black",size=10), panel.background = element_blank(), axis.line = element_line(colour = "black"), axis.text.y = element_text(colour="black",size=11))+ scale_y_continuous(breaks=c(-3,-2,-1,0,1,2,3))+ scale_x_continuous(breaks=c(-3,-2,-1,0,1,2,3))+ geom_hline(yintercept = 0)+ geom_vline(xintercept = 0)+ ggtitle("Homogeneity and homoscedasticity") all_plots = ggarrange(hist_res, qq_res, homo_res , labels = c("A", "B", "C"), ncol = 2, nrow = 2) output = list('All plots' = all_plots, 'Shapiro results' = results_shapiro) #suppressWarnings() #print(results_shapiro) print(all_plots) return(output) } lm_boot_coef <- function(fitted_model = fitted_model, R = 1000, std_method = "smart"){ if (!requireNamespace("pacman", quietly = TRUE)) { install.packages("pacman") } if (!requireNamespace("tidyverse", quietly = TRUE)) { install.packages("tidyverse") } if (!requireNamespace("easystats", quietly = TRUE)) { install.packages("easystats") } if (!requireNamespace("boot", quietly = TRUE)) { install.packages("boot") } if (!requireNamespace("knitr", quietly = TRUE)) { install.packages("knitr") } if (!requireNamespace("kableExtra", quietly = TRUE)) { install.packages("kableExtra") } if (!requireNamespace("jtools", quietly = TRUE)) { install.packages("jtools") } if (!requireNamespace("car", quietly = TRUE)) { install.packages("car") } pacman::p_load(tidyverse, easystats, boot,knitr,jtools, car) outjtools <- jtools::summ(fitted_model, digits = 3, scale = T) cis <- Confint(Boot(object = fitted_model, R = R, method = c("case", "residual")), level=.95, type="bca") betas <- standardize_parameters(fitted_model, method = std_method) return(cbind(outjtools$coeftable, data.frame(cis), data.frame(betas)) %>% select(Parameter, Estimate, X2.5.., X97.5..,`t val.`, p, Std_Coefficient) %>% rename("b" = "Estimate", "Lower" = "X2.5..", "Upper" = "X97.5..", "t" = "t val.", "Beta" = "Std_Coefficient") %>% mutate(p = case_when(p < .001 ~ "< .001", TRUE ~ as.character(round(p,3)))) %>% mutate(across(where(is.numeric), ~ round(.x, 2))) %>% kable(., align = "c", row.names = F) %>% kableExtra::footnote( alphabet = c(paste("adj R² = ",round(r2(fitted_model)[["R2_adjusted"]],3), "; N = ", NROW(fitted_model[["model"]]))))) }