# First of all, clear workspace rm(list=ls()) # Save graphical parameters op <- par() # Check if AIC file exists aic_file <- "https://raw.githubusercontent.com/BrainMapINB/Puberty/main/AIC/AIC_results.csv" # If you prefer to generate those values follow the steps within this 'if' command # (but be aware that some computations may be take a while) if(0){ # Read phenotypic data info <- read.csv("https://raw.githubusercontent.com/BrainMapINB/Puberty/main/Phenotypic/phenotypic.csv") # Remove those sessions with excesive motion artifact info <- info[which(as.logical(info$QC)),] # Refactorize subjects' ID info$BIDS.ID <- factor(info$BIDS.ID) # Extract time-series files (those with GSR) ts_files <- paste0("https://raw.githubusercontent.com/BrainMapINB/Puberty/main/Brain/GSR/sub-", info$BIDS.ID,"/ses-",info$Session,"/pp150v_wGSR_NIHPD_MNI_P264_ts.txt") # Compute connectivity matrices ConnMx <- sapply(ts_files, function(x){ ts <- read.table(x) cmx <- cor(ts) if(sum(is.na(cmx))>0) cmx[is.na(cmx)] <- 0 return(cmx) } ) # Reshape object nroi <- sqrt(dim(ConnMx)[1]) ConnMx_dim <- c(nroi, nroi, nrow(info)) ConnMx <- array(ConnMx, dim = ConnMx_dim) # Load graph theory formulas source("https://raw.githubusercontent.com/BrainMapINB/Puberty/main/Scripts/Auxiliary/inet_w.R") # Load cost-threshold functions source("https://raw.githubusercontent.com/BrainMapINB/Puberty/main/Scripts/Auxiliary/cost_th.R") source("https://raw.githubusercontent.com/BrainMapINB/Puberty/main/Scripts/Auxiliary/cost_th_ConnMx.R") # Load 'gamm4' package for GAMM analyses if(!require(gamm4)) install.packages("gamm4"); library(gamm4) # Load 'psych' package for Fisher r-to-z transformation if(!require(psych)) install.packages("psych"); library(psych) # Graph names grp_var <- c("D_net","C_net","L_net","E_net") # Explore a set of thresholds cost <- seq(from = 0.01, to = 0.48, by = 0.01) # Store results into empty matrix DF <- matrix(data = 0, nrow = length(cost), ncol = 139) DF[,1] <- cost # Set column names colnames(DF) <- c("cost","thr_avg","thr_sd", "d_GAMMsAge_AIC","d_GAMMsAge_sex_b","d_GAMMsAge_sex_t","d_GAMMsAge_sex_p", "d_GAMMsAge_edf","d_GAMMsAge_f","d_GAMMsAge_p", "d_GAMMsAgeSex_AIC","d_GAMMsAgeSex_sex_b","d_GAMMsAgeSex_sex_t","d_GAMMsAgeSex_sex_p", "d_GAMMsAgeSexF_edf","d_GAMMsAgeSexF_f","d_GAMMsAgeSexF_p", "d_GAMMsAgeSexM_edf","d_GAMMsAgeSexM_f","d_GAMMsAgeSexM_p", "d_GAMMsPDS_AIC","d_GAMMsPDS_sex_b","d_GAMMsPDS_sex_t","d_GAMMsPDS_sex_p", "d_GAMMsPDS_edf","d_GAMMsPDS_f","d_GAMMsPDS_p", "d_GAMMsPDSSex_AIC","d_GAMMsPDSSex_sex_b","d_GAMMsPDSSex_sex_t","d_GAMMsPDSSex_sex_p", "d_GAMMsPDSSexF_edf","d_GAMMsPDSSexF_f","d_GAMMsPDSSexF_p", "d_GAMMsPDSSexM_edf","d_GAMMsPDSSexM_f","d_GAMMsPDSSexM_p", "c_GAMMsAge_AIC","c_GAMMsAge_sex_b","c_GAMMsAge_sex_t","c_GAMMsAge_sex_p", "c_GAMMsAge_edf","c_GAMMsAge_f","c_GAMMsAge_p", "c_GAMMsAgeSex_AIC","c_GAMMsAgeSex_sex_b","c_GAMMsAgeSex_sex_t","c_GAMMsAgeSex_sex_p", "c_GAMMsAgeSexF_edf","c_GAMMsAgeSexF_f","c_GAMMsAgeSexF_p", "c_GAMMsAgeSexM_edf","c_GAMMsAgeSexM_f","c_GAMMsAgeSexM_p", "c_GAMMsPDS_AIC","c_GAMMsPDS_sex_b","c_GAMMsPDS_sex_t","c_GAMMsPDS_sex_p", "c_GAMMsPDS_edf","c_GAMMsPDS_f","c_GAMMsPDS_p", "c_GAMMsPDSSex_AIC","c_GAMMsPDSSex_sex_b","c_GAMMsPDSSex_sex_t","c_GAMMsPDSSex_sex_p", "c_GAMMsPDSSexF_edf","c_GAMMsPDSSexF_f","c_GAMMsPDSSexF_p", "c_GAMMsPDSSexM_edf","c_GAMMsPDSSexM_f","c_GAMMsPDSSexM_p", "l_GAMMsAge_AIC","l_GAMMsAge_sex_b","l_GAMMsAge_sex_t","l_GAMMsAge_sex_p", "l_GAMMsAge_edf","l_GAMMsAge_f","l_GAMMsAge_p", "l_GAMMsAgeSex_AIC","l_GAMMsAgeSex_sex_b","l_GAMMsAgeSex_sex_t","l_GAMMsAgeSex_sex_p", "l_GAMMsAgeSexF_edf","l_GAMMsAgeSexF_f","l_GAMMsAgeSexF_p", "l_GAMMsAgeSexM_edf","l_GAMMsAgeSexM_f","l_GAMMsAgeSexM_p", "l_GAMMsPDS_AIC","l_GAMMsPDS_sex_b","l_GAMMsPDS_sex_t","l_GAMMsPDS_sex_p", "l_GAMMsPDS_edf","l_GAMMsPDS_f","l_GAMMsPDS_p", "l_GAMMsPDSSex_AIC","l_GAMMsPDSSex_sex_b","l_GAMMsPDSSex_sex_t","l_GAMMsPDSSex_sex_p", "l_GAMMsPDSSexF_edf","l_GAMMsPDSSexF_f","l_GAMMsPDSSexF_p", "l_GAMMsPDSSexM_edf","l_GAMMsPDSSexM_f","l_GAMMsPDSSexM_p", "e_GAMMsAge_AIC","e_GAMMsAge_sex_b","e_GAMMsAge_sex_t","e_GAMMsAge_sex_p", "e_GAMMsAge_edf","e_GAMMsAge_f","e_GAMMsAge_p", "e_GAMMsAgeSex_AIC","e_GAMMsAgeSex_sex_b","e_GAMMsAgeSex_sex_t","e_GAMMsAgeSex_sex_p", "e_GAMMsAgeSexF_edf","e_GAMMsAgeSexF_f","e_GAMMsAgeSexF_p", "e_GAMMsAgeSexM_edf","e_GAMMsAgeSexM_f","e_GAMMsAgeSexM_p", "e_GAMMsPDS_AIC","e_GAMMsPDS_sex_b","e_GAMMsPDS_sex_t","e_GAMMsPDS_sex_p", "e_GAMMsPDS_edf","e_GAMMsPDS_f","e_GAMMsPDS_p", "e_GAMMsPDSSex_AIC","e_GAMMsPDSSex_sex_b","e_GAMMsPDSSex_sex_t","e_GAMMsPDSSex_sex_p", "e_GAMMsPDSSexF_edf","e_GAMMsPDSSexF_f","e_GAMMsPDSSexF_p", "e_GAMMsPDSSexM_edf","e_GAMMsPDSSexM_f","e_GAMMsPDSSexM_p" ) for(hh in 1:length(cost)){ # Print progress print(paste0("Cost ",cost[hh])) # Apply cost threshold thr <- apply(ConnMx,3,function(x) cost_th(x,cost[hh])) # Save threshold average DF[hh,2] <- mean(thr); DF[hh,3] <- sd(thr) # Apply thresholds to Connectivity Matrix ConnMx_th <- apply(ConnMx,3,function(x) cost_th_ConnMx(x,cost[hh])) ConnMx_th <- array(ConnMx_th,dim=ConnMx_dim) # Apply complex networks measures. # Substract diagonal for(ii in 1:ConnMx_dim[3]) diag(ConnMx_th[,,ii]) <- 0 # Fisher's z transformation ConnMx_th <- fisherz(ConnMx_th) if(max(ConnMx_th) == Inf) ConnMx_th[ConnMx_th==Inf] <- max(ConnMx_th[ConnMx_th!=Inf]) # Weighted-Degree w_deg <- apply(ConnMx_th,3:2,sum) # Clustering Coefficiente (per node) - weighted tic <- Sys.time() w_CC <- t(sapply(1:ConnMx_dim[3], function(x) iCC_w(ConnMx_th[,,x]))) if(sum(is.na(w_CC))>0) w_CC[is.na(w_CC)] <- 0 toc <- Sys.time() print("Clustering:"); print(toc-tic) # Average distances (per node) - weighted tic <- Sys.time() w_L <- t(sapply(1:ConnMx_dim[3], function(x) iL_w(ConnMx_th[,,x]))) toc <- Sys.time() print("Path-length:"); print(toc-tic) # Inverse average distances (per node) - weighted tic <- Sys.time() w_E <- t(sapply(1:ConnMx_dim[3], function(x) iEff_w(ConnMx_th[,,x]))) toc <- Sys.time() print("Efficiency:"); print(toc-tic) # Whole-brain graph-theory measures along the sample D_net <- rowMeans(w_deg) C_net <- rowMeans(w_CC) L_net <- rowMeans(w_L) E_net <- rowMeans(w_E) # Apply linear models at the group level info$D_net <- D_net info$C_net <- C_net info$L_net <- L_net info$E_net <- E_net # Test different models for(kk in 1:length(grp_var)){ # GAMM model (age spline) grp_form <- as.formula(paste0(grp_var[kk], " ~ s(Age, k=4) + Sex + FDRMS + Coil")) fitGAMM <- tryCatch(gamm4(grp_form, random = ~ (1|BIDS.ID), data = info, REML = F), error=function(e) NA) # GAMM model (age*sex splines) grp_form <- as.formula(paste0(grp_var[kk], " ~ s(Age, k=4, by=Sex) + Sex + FDRMS + Coil")) fitGAMMi <- tryCatch(gamm4(grp_form, random = ~ (1|BIDS.ID), data = info, REML = F), error=function(e) NA) # GAMM model (PDS spline) grp_form <- as.formula(paste0(grp_var[kk], " ~ s(PDSe, k=4) + Sex + FDRMS + Coil")) fitPDS <- tryCatch(gamm4(grp_form, random = ~ (1|BIDS.ID), data = info, REML = F), error=function(e) NA) # GAMM model (PDS*sex splines) grp_form <- as.formula(paste0(grp_var[kk], " ~ s(PDSe, k=4, by=Sex) + Sex + FDRMS + Coil")) fitPDSi <- tryCatch(gamm4(grp_form, random = ~ (1|BIDS.ID), data = info, REML = F), error=function(e) NA)# Store statistics # GAMM-Age DF[hh,4+(kk-1)*34] <- tryCatch(AIC(fitGAMM$mer), error=function(e) NA) DF[hh,5:7+(kk-1)*34] <- tryCatch(summary(fitGAMM$gam)$p.table[2,c(1,3,4)], error=function(e) NA) DF[hh,8:10+(kk-1)*34] <- tryCatch(summary(fitGAMM$gam)$s.table[,c(1,3,4)], error=function(e) NA) # GAMM-AgeBySex DF[hh,11+(kk-1)*34] <- tryCatch(AIC(fitGAMMi$mer), error=function(e) NA) DF[hh,12:14+(kk-1)*34] <- tryCatch(summary(fitGAMMi$gam)$p.table[2,c(1,3,4)], error=function(e) NA) DF[hh,15:20+(kk-1)*34] <- tryCatch(c(t(summary(fitGAMMi$gam)$s.table[,c(1,3,4)])), error=function(e) NA) # GAMM-PDS DF[hh,21+(kk-1)*34] <- tryCatch(AIC(fitPDS$mer), error=function(e) NA) DF[hh,22:24+(kk-1)*34] <- tryCatch(summary(fitPDS$gam)$p.table[2,c(1,3,4)], error=function(e) NA) DF[hh,25:27+(kk-1)*34] <- tryCatch(summary(fitPDS$gam)$s.table[,c(1,3,4)], error=function(e) NA) # GAMM-PDSbySex DF[hh,28+(kk-1)*34] <- tryCatch(AIC(fitPDSi$mer), error=function(e) NA) DF[hh,29:31+(kk-1)*34] <- tryCatch(summary(fitPDSi$gam)$p.table[2,c(1,3,4)], error=function(e) NA) DF[hh,32:37+(kk-1)*34] <- tryCatch(c(t(summary(fitPDSi$gam)$s.table[,c(1,3,4)])), error=function(e) NA) }# for(kk in 1:length(grp_var)) }# for(hh in 1:length(cost)) # Save results outname <- "AIC_results.csv" write.csv(DF, outname, quote = F, row.names = F) } # Read AIC results all_fits <- read.csv(aic_file) # Plot results # Load 'ggplot2' package if(!require(ggplot2)) install.packages("ggplot2"); library(ggplot2) # Model labels mod_labs <- c("Age","Age-Sex","PDS","PDS-Sex") if(!all(rowSums(abs(all_fits[,-(1:3)]))!=0)) all_fits <- all_fits[which(rowSums(abs(all_fits[,-(1:3)]))!=0),] # Compute threshold-based FDR all_fitsFDR <- all_fits all_fitsFDR[,grep("_p",names(all_fits))] <- apply(all_fits[,grep("_p",names(all_fits))], 2, function(x) p.adjust(x, method = "fdr")) # Set initial variables grph_names <- c("d_","c_","l_","e_") best_aic <- matrix(0, nrow = nrow(all_fits), ncol = length(grph_names)) colnames(best_aic) <- paste0(grph_names,"bestAIC") best_sigFDR <- best_sig <- best_aic # Store best models for(ee in 1:length(grph_names)){ # Extract columns aic_colidx <- intersect(grep(paste0(grph_names[ee],"G"), names(all_fits)),grep("AIC", names(all_fits))) sig_colidx <- intersect(grep(paste0(grph_names[ee],"G"), names(all_fits)),grep("_p", names(all_fits))) sig_colidx <- sig_colidx[grep("_sex_", names(all_fits)[sig_colidx], invert = T)] # Get best fit best_aic[,ee] <- sapply(1:nrow(all_fits), function(x){ y1 <- which.min(all_fits[x,aic_colidx]) # Get best AIC position if(length(y1) == 0) y1 <- NA return(y1) } ) # Get best spline best_sig[,ee] <- sapply(1:nrow(all_fits), function(x){ y1 <- which.min(all_fits[x,aic_colidx]) # Get best AIC position if(length(y1) == 0) y2 <- NA else{ if(y1 == 1) y2 <- all_fits[x,sig_colidx[1]] if(y1 == 2) y2 <- min(all_fits[x,sig_colidx[2:3]]) if(y1 == 3) y2 <- all_fits[x,sig_colidx[4]] if(y1 == 4) y2 <- min(all_fits[x,sig_colidx[5:6]]) } return(y2) } ) # Get best spline (FDR) best_sigFDR[,ee] <- sapply(1:nrow(all_fitsFDR), function(x){ y1 <- which.min(all_fitsFDR[x,aic_colidx]) # Get best AIC position if(length(y1) == 0) y2 <- NA else{ if(y1 == 1) y2 <- all_fitsFDR[x,sig_colidx[1]] if(y1 == 2) y2 <- min(all_fitsFDR[x,sig_colidx[2:3]]) if(y1 == 3) y2 <- all_fitsFDR[x,sig_colidx[4]] if(y1 == 4) y2 <- min(all_fitsFDR[x,sig_colidx[5:6]]) } return(y2) } ) } # Plot best results # Reshape data.frame rsh_df <- as.data.frame(rep(all_fits$cost,length(grph_names))) # cost rsh_df <- cbind(rsh_df,rep(1:length(grph_names), each = nrow(all_fits))) # varnames rsh_df <- cbind(rsh_df,c(best_aic)) # best AIC rsh_df <- cbind(rsh_df,c(best_sig)) # best p-value rsh_df <- cbind(rsh_df,c(best_sigFDR)) # best pFDR rsh_df <- cbind(rsh_df,rep(1,nrow(rsh_df))) # symbol size rsh_df <- cbind(rsh_df,rep(1,nrow(rsh_df))) # symbol alpha names(rsh_df) <- c("cost","var","AIC","p","pFDR","size","alpha") # Set symbol sizes rsh_df$size[rsh_df$p < 0.05] <- 1.5 rsh_df$size[rsh_df$pFDR < 0.05] <- 3 # Set symbol alphas rsh_df$alpha[rsh_df$pFDR > 0.05] <- 0.5 rsh_df$alpha[rsh_df$p > 0.05] <- 0.1 rsh_df$var <- factor(rsh_df$var) levels(rsh_df$var) <- c("Degree","Clustering","Path-Lengh","Efficiency") # Relevel best-fit labels rsh_df$AIC <- factor(rsh_df$AIC) levels(rsh_df$AIC) <- mod_labs[as.integer(levels(rsh_df$AIC))] # Plot best AIC ggAIC <- ggplot(data=rsh_df, aes(x=var, y=cost)) + stat_identity(aes(color=AIC, size=size)) + labs(color = "Model", size = "Significance") + ylab("Connectivity density (proportion)") + xlab("") + ggtitle("Lower Akaike Information Criterion (AIC)") + coord_flip() + scale_color_brewer(palette = "Dark2") + theme_light() show(ggAIC) # Significance: p > 0.05 (1); p < 0.05 (1.5); pFDR < 0.05 (3) # Save plot outfile <- "Lower_AIC.pdf" #ggsave(outfile, ggAIC, device = "pdf")