--- title: "Presenting Regression Analyses in R: Part 2" author: "Kevin Donovan" date: "`r format(Sys.time(), '%B %d, %Y')`" output: html_document --- ```{r setup, include=FALSE, echo=FALSE} knitr::opts_chunk$set(echo=FALSE, message = FALSE, warning = FALSE, fig.width = 8, fig.height = 4) library(r2glmm) library(effsize) library(ggfortify) library(esvis) library(ppcor) library(tidyverse) library(shiny) library(rmarkdown) library(broom) library(gtsummary) library(flextable) library(ggpubr) library(nlme) library(lme4) library(broom.mixed) library(GGally) library(ggcorrplot) library(lsmeans) library(sjPlot) library(r2glmm) library(effsize) library(ggfortify) library(esvis) library(ppcor) ``` ```{r} brain_data <- read_csv("../Data/IBIS_brain_data_ex.csv") %>% mutate(V24_ASD_DX_edit = factor(ifelse(grepl("YES", V24_ASD_DX), "Positive", ifelse(grepl("NO", V24_ASD_DX), "Negative", NA)))) behav_data <- read_csv("../Data/Cross-sec_full.csv", na=c(".", "", " ")) names(behav_data) <- gsub(" |,",".",names(behav_data)) mixed_model_data <- behav_data %>% select(Identifiers, GROUP, Gender, V36.mullen.composite_standard_score:V12.mullen.composite_standard_score) vars_to_convert <- names(mixed_model_data)[c(-1,-2,-3)] mixed_model_data <- mixed_model_data %>% gather(variable, var_value, vars_to_convert) %>% separate(variable,c("Visit","Variable"),sep=3) %>% spread(key=Variable, value=var_value) %>% plyr::rename(c(".mullen.composite_standard_score"="Mullen_Composite_Score")) %>% mutate(ASD_Diag = factor(ifelse(grepl("ASD", GROUP), "ASD_Neg", "ASD_Pos")), Visit=factor(Visit), Visit_Num=as.numeric(Visit)-1, Mullen_Composite_Score=as.numeric(Mullen_Composite_Score)) %>% arrange(Identifiers, Visit) ``` # Correlation analyses Correlation heat map ```{r fig.width = 7, fig.height = 6} brain_data_v24 <- brain_data %>% select(names(brain_data)[grepl("V24", names(brain_data))]) %>% select(EACSF_V24:RightAmygdala_V24 ) ggcorrplot(cor(x=brain_data_v24, method="pearson", use="pairwise.complete.obs"), hc.order = TRUE, type = "lower", lab = TRUE, outline.col = "white") ``` Grid of heat maps ```{r fig.width = 10, fig.height = 8} brain_data_v12 <- brain_data %>% select(names(brain_data)[grepl("V12", names(brain_data))]) %>% select(EACSF_V12:RightAmygdala_V12 ) brain_data_v24 <- brain_data %>% select(names(brain_data)[grepl("V24", names(brain_data))]) %>% select(EACSF_V24:RightAmygdala_V24 ) p.mat_v12 <- cor_pmat(brain_data_v12) p.mat_v24 <- cor_pmat(brain_data_v24) heatmaps <- list(ggcorrplot(cor(x=brain_data_v12, method="pearson", use="pairwise.complete.obs"), hc.order = TRUE, type = "lower", lab = TRUE, outline.col = "white", p.mat = p.mat_v12)+ labs(title = "12 months") , ggcorrplot(cor(x=brain_data_v24, method="pearson", use="pairwise.complete.obs"), hc.order = TRUE, type = "lower", lab = TRUE, outline.col = "white", p.mat = p.mat_v24)+ labs(title = "24 months")) ggarrange(plotlist=heatmaps, common.legend = TRUE, legend = "bottom") ``` # Summary statistics - Can create easily formatted summary stats tables **in code** and add effect sizes - Can use any effect size formula of interest ```{r fig.width = 10, fig.height = 8} brain_data_v24 <- brain_data %>% select(c(names(brain_data)[grepl("V24", names(brain_data))], "RiskGroup")) %>% select(EACSF_V24:RightAmygdala_V24, RiskGroup) %>% filter(RiskGroup%in%c("HR-ASD", "HR-Neg")) # Create effect size function using Cohen's d my_EStest <- function(data, variable, by, ...) { d <- effsize::cohen.d(data[[variable]] ~ as.factor(data[[by]]), conf.level=.95, pooled=TRUE, paired=FALSE, hedges.correction=TRUE) # Formatting statistic with CI est <- round(d$estimate, 2) ci <- round(d$conf.int, 2) %>% paste(collapse = ", ") # returning estimate with CI together str_glue("{est} ({ci})") } tbl_summary(data=brain_data_v24, by=RiskGroup, missing_text = "Missing", statistic = list(all_continuous() ~ "{mean} ({sd})")) %>% add_p(list(all_continuous() ~ "aov", all_categorical() ~ "chisq")) %>% add_n() %>% add_stat( fns = everything() ~ my_EStest, fmt_fun = NULL, header = "**ES (95% CI)**" ) %>% modify_footnote(add_stat_1 ~ "Cohen's D (95% CI)") %>% as_flex_table() %>% bold(bold = TRUE, part = "header") %>% autofit() ``` # Regression summaries ## Linear Models ### F and t-test ```{r} # Pairwise differences: Cohen's D cohen_d <- coh_d(V24_MSEL_EL_ae~RiskGroup, data=brain_data) # Now construct Cohen's F^2 lm_fit <- lm(V24_MSEL_EL_ae~RiskGroup, data=brain_data) # 1. By hand sum_of_sq_table <- anova(lm_fit) eta_2 <- sum_of_sq_table$`Sum Sq`[1]/(sum(sum_of_sq_table$`Sum Sq`)) eta_2/(1-eta_2) # 2. Using package effectsize::cohens_f_squared(lm_fit, ci=0.95, partial = FALSE) ``` ### Effect sizes ```{r} lm_fit <- lm(V24_MSEL_EL_ae~RiskGroup+RightAmygdala_V24, data=brain_data) lm_fit_tidy <- tidy(lm_fit) # Least square means ls_means_df <- lsmeans(lm_fit, "RiskGroup", at = list(RightAmygdala_V24=mean(brain_data$RightAmygdala_V24, na.rm=TRUE))) %>% data.frame() # Now construct partial Cohen's F^2 # 1. By hand sum_of_sq_table <- anova(lm_fit) partial_eta_2 <- sum_of_sq_table$`Sum Sq`[1]/(sum(sum_of_sq_table$`Sum Sq`)) partial_eta_2/(1-partial_eta_2) # 2. Using package effectsize::cohens_f_squared(lm_fit, ci=0.95) # Now try adjusted Cohen's D # Source: Lipsey and Wilson's (2001) book, Practical Meta-analysis, Appendix B # From https://stats.stackexchange.com/questions/348502/cohens-d-from-a-linear-regression-model brain_data_noLR <- brain_data %>% filter(RiskGroup!="LR-Neg") lm_fit <- lm(V24_MSEL_EL_ae~RiskGroup+RightAmygdala_V24, data=brain_data_noLR) N <- dim(brain_data_noLR)[1] n1 <- sum(brain_data_noLR$RiskGroup=="HR-ASD") n2 <- N - n1 group_estimate <- summary(lm_fit)$coef["RiskGroupHR-Neg","Estimate"] model_df <- model.frame(V24_MSEL_EL_ae~RiskGroup+RightAmygdala_V24, brain_data_noLR) pooled_se_lm <- (var(model_df[,1])*(N-1)-((group_estimate)^2)*(n1*n2/(n1+n2)))/N group_estimate/pooled_se_lm ``` ### Semi-partial correlation ```{r} brain_data_v24 <- brain_data %>% select(c(names(brain_data)[grepl("V24", names(brain_data))], "RiskGroup")) %>% select(EACSF_V24:RightAmygdala_V24, V24_MSEL_EL_ae, RiskGroup) lm_fit <- lm(V24_MSEL_EL_ae~RiskGroup+RightAmygdala_V24+TBV_V24, data=brain_data_v24) summary(lm_fit) spcors_lm <- spcor(brain_data_v24 %>% dplyr::select(V24_MSEL_EL_ae, RiskGroup, RightAmygdala_V24, TBV_V24) %>% mutate(RiskGroup=as.numeric(factor(RiskGroup))) %>% drop_na()) # NOTE: can't have missing values and all variables must be numeric # crudely converted diagnosis to numeric, should manually create dummy variables # instead spcors_lm_est <- spcors_lm$estimate spcors_p_val <- spcors_lm$p.value ggcorrplot(spcors_lm_est, hc.order = TRUE, type = "full", lab = TRUE, outline.col = "white", p.mat = spcors_p_val) ``` ## Mixed Models ```{r} mixed_fit <- lme(Mullen_Composite_Score~Visit_Num + ASD_Diag, data=mixed_model_data, random = ~1|Identifiers, na.action = na.omit) # Can tidy using broom.mixed package fixed_effects <- tidy(mixed_fit) %>% filter(effect=="fixed") %>% select(-effect, -group) # Semi partial R^2 from r2glmm package # Best measure in my opinion, accurate for any mixed model/generalized mixed model semi_partial_r2_mixed <- r2beta(mixed_fit) plot(semi_partial_r2_mixed) # Adjusted Cohen's D: AD HOC, not the best method in general (only valid in specific samples) # Source: https://www.researchgate.net/publication/264627622_Statistical_Power_and_Optimal_Design_in_Experiments_in_Which_Samples_of_Participants_Respond_to_Samples_of_Stimuli random_effect_var <- tidy(mixed_fit) se_pooled <- sqrt((random_effect_var$estimate[4])^2+(random_effect_var$estimate[5])^2) adjUsted_cohensd_mixed <- fixed_effects$estimate[3]/se_pooled ```