--- title: "10.GSVA" output: html_document date: "2024-11-21" --- # Load necessary libraries ```{r} library(DESeq2) library(GSVA) library(limma) library(ComplexHeatmap) library(EnhancedVolcano) library(GSEABase) library(circlize) # Function to read GMT file readGMT <- function(file) { getGmt(file) } ``` ```{r} # Normalizing the count data DESeq <- readRDS("/Volumes/Fengshuo_14T/Lab/Yunfeng/cancer_comparsions/Zenodo/data/DESeq2_obj_archetype_comparsion/Mono.DESeq.rds") DESeq <- estimateSizeFactors(DESeq) norm_counts <- counts(DESeq, normalized = TRUE) # Extract the normalized expression matrix exprs_data <- assay(DESeq) ``` ```{r} # Step 2: Run GSVA gene_sets <- readGMT("./data/msigdb_v2023.2.Hs_GMTs/msigdb.v2023.2.Hs.symbols.gmt") # specify the path to your .gmt file #gene_sets <- readGMT("~/OneDrive - Baylor College of Medicine/Lab/Zhan/202308_IIA_combine_ic/data/MsigDB/msigdb_v2023.2.Hs_GMTs/c5.all.v2023.2.Hs.symbols.gmt") # specify the path to your .gmt file #gene_sets <- readGMT("~/OneDrive - Baylor College of Medicine/Lab/Zhan/202308_IIA_combine_ic/data/MsigDB/msigdb_v2023.2.Hs_GMTs/C5_immune_bone.gmt") # specify the path to your .gmt file gsva_res <- gsva(exprs_data, gene_sets, method="gsva", min.sz=10, max.sz=500, verbose=TRUE) saveRDS(gsva_res, './outs/GSVA_tex/gsva_res.all.rds') ``` #Step 1: Set Up the Design Matrix with All Groups #First, you need to create a design matrix that includes all your groups. Instead of comparing just two groups, we'll set up the design matrix to model each group separately. ```{r} # Set up the design matrix with all groups (without an intercept) design <- model.matrix(~ 0 + group.id, data = colData(DESeq)) colnames(design) <- levels(colData(DESeq)$group.id) ``` #Step 2: Fit the Linear Model #Use the design matrix to fit the linear model with your GSVA results. ```{r} # Fit the linear model fit <- lmFit(gsva_res, design) ``` Step 3: Define Contrasts for Multiple Comparisons Now, you can define contrasts to compare groups. You can perform: Pairwise comparisons between all groups. Comparison of each group against the average of the others. An overall test to identify any gene sets that differ among the groups. ```{r, Overall Test Using F-Statistics} #For an overall test to see if there are any differences among the groups: # Apply empirical Bayes moderation fit2 <- eBayes(fit) # Get the top gene sets sorted by F-statistic top_gene_sets <- topTable(fit2, coef = NULL, number = Inf, sort.by = "F") # View the top gene sets head(top_gene_sets) length(rownames(top_gene_sets)) saveRDS(top_gene_sets, './outs/GSVA_tex/top_gene_sets.all.rds') ``` ```{r} library(ComplexHeatmap) library(circlize) # Create a sample annotation data frame sample_annotation <- data.frame( group.id = colData(DESeq)$group.id ) rownames(sample_annotation) <- colnames(gsva_res) # Define colors for each group group_levels <- unique(sample_annotation$group.id) group_colors <- setNames( colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)), group_levels ) # Create a HeatmapAnnotation ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors) ) # Select top gene sets for visualization top_gene_set_names <- rownames(top_gene_sets)[1:20] # Top 20 gene sets # Create the heatmap heatmap <- Heatmap( gsva_res[top_gene_set_names, ], name = "GSVA Score", top_annotation = ha, show_column_names = T, cluster_rows = TRUE, cluster_columns = T, column_title = "Samples", row_title = "Gene Sets" ) # Draw the heatmap draw(heatmap) length(rownames(top_gene_sets)) ``` ```{r} #filtering the pathways: # Assuming 'top_gene_sets' is your results DataFrame from the limma analysis # Filter pathways with adjusted p-value less than 0.05 significant_gene_sets <- top_gene_sets[top_gene_sets$adj.P.Val < 0.05, ] # Check if 'MφOC' column exists in your DataFrame # If the column name is different, replace 'MφOC' with the correct column name # Sort the significant pathways by the 'MφOC' column in descending order ranked_gene_sets <- significant_gene_sets[order(-significant_gene_sets$`TregTex`), ] # View the first few rows of the ranked gene sets head(ranked_gene_sets) length(rownames(ranked_gene_sets)) write.csv(ranked_gene_sets, './outs/GSVA_by_celltype_summarized_2/Tex_ranked_gene_sets.csv') saveRDS(ranked_gene_sets, './outs/GSVA_by_celltype_summarized_2/Tex_ranked_gene_sets.rds') ``` ```{r} # Select Gene Sets for Plotting # Specify the number of gene sets to plot num_gene_sets <- 200 # Get the names of the top gene sets gene_set_names <- rownames(ranked_gene_sets)[1:num_gene_sets] # Ensure that you don't exceed the number of available gene sets gene_set_names <- gene_set_names[!is.na(gene_set_names)] library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Create the sample annotation data frame sample_annotation <- data.frame( group.id = colData(DESeq)$group.id ) rownames(sample_annotation) <- colnames(gsva_res) # Define colors for each group group_levels <- unique(sample_annotation$group.id) group_colors <- setNames( colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)), group_levels ) # Create a HeatmapAnnotation ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors) ) # Subset the gsva_res matrix to include only the selected gene sets gsva_res_sub <- gsva_res[gene_set_names, ] # Define color function based on data range min_score <- min(gsva_res_sub) max_score <- max(gsva_res_sub) color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red")) # Create the heatmap heatmap <- Heatmap( gsva_res_sub, name = "GSVA Score", top_annotation = ha, show_column_names = FALSE, cluster_rows = TRUE, cluster_columns = TRUE, column_title = "Samples", row_title = "Gene Sets", col = color_fun, row_names_gp = gpar(fontsize = 6) # Adjust font size as needed ) # Draw the heatmap draw(heatmap) ``` ```{r} # Get the names of the gene sets to plot gene_set_names <- rownames(ranked_gene_sets) library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Create the sample annotation data frame sample_annotation <- data.frame( group.id = colData(DESeq)$group.id ) rownames(sample_annotation) <- colnames(gsva_res) # Define colors for each group group_levels <- unique(sample_annotation$group.id) group_colors <- setNames( colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)), group_levels ) # Create a HeatmapAnnotation ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors) ) # Subset the gsva_res matrix to include only the selected gene sets gsva_res_sub <- gsva_res[gene_set_names, ] # Ensure the gene sets are ordered as in 'ranked_gene_sets' gsva_res_sub <- gsva_res_sub[gene_set_names, ] # Define color function based on data range min_score <- min(gsva_res_sub) max_score <- max(gsva_res_sub) color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red")) # Adjust figure size by setting width and height heatmap <- Heatmap( gsva_res_sub, name = "GSVA Score", top_annotation = ha, show_column_names = FALSE, cluster_rows = TRUE, cluster_columns = TRUE, column_title = "Samples", row_title = "Gene Sets", col = color_fun, row_names_gp = gpar(fontsize = 10) ) # Draw the heatmap draw(heatmap) ``` ##joint the celltypes ```{r} #Step 1: Load and Prepare Data # Load your ranked_gene_sets data frames # Replace these with your actual data frames ranked_gene_sets_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/top_gene_sets.rds") length(rownames(ranked_gene_sets_macrophage)) ranked_gene_sets_treg <- top_gene_sets # For Treg cells length(rownames(ranked_gene_sets_treg)) # Find common gene set names common_gene_sets <- intersect(rownames(ranked_gene_sets_macrophage), rownames(ranked_gene_sets_treg)) length(common_gene_sets) # Decide how many gene sets to plot num_gene_sets <- 50 # Adjust as needed # From the common gene sets, select the top gene sets # Here, we'll use the order from the macrophage dataset gene_set_names <- rownames(ranked_gene_sets_macrophage)[rownames(ranked_gene_sets_macrophage) %in% common_gene_sets][1:num_gene_sets] # Ensure that you don't exceed the number of available gene sets gene_set_names <- gene_set_names[!is.na(gene_set_names)] #Step 2: Extract and Combine GSVA Results #Subset the GSVA results to include only the selected gene sets: # Subset the GSVA results for the selected gene sets gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5.rds") gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names, ] gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5.rds") gsva_res_treg_sub <- gsva_res_treg[gene_set_names, ] #Adjust sample names to include cell type information: # Append cell type to sample names for uniqueness colnames(gsva_res_macrophage_sub) <- paste0(colnames(gsva_res_macrophage_sub), "_Macrophage") colnames(gsva_res_treg_sub) <- paste0(colnames(gsva_res_treg_sub), "_Treg") #Combine the GSVA results: # Combine the GSVA results into one matrix gsva_res_combined <- cbind(gsva_res_macrophage_sub, gsva_res_treg_sub) #Step 3: Create Sample Annotations #Create sample annotations for both cell types: # For macrophage samples DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds") sample_annotation_macrophage <- data.frame( group.id = colData(DESeq_macrophage)$group.id, cell.type = "Macrophage" ) rownames(sample_annotation_macrophage) <- paste0(colnames(gsva_res_macrophage_sub), "_Macrophage") # For Treg samples DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds") sample_annotation_treg <- data.frame( group.id = colData(DESeq_treg)$group.id, cell.type = "Treg" ) rownames(sample_annotation_treg) <- paste0(colnames(gsva_res_treg_sub), "_Treg") # Combine the sample annotations sample_annotation_combined <- rbind(sample_annotation_macrophage, sample_annotation_treg) #Ensure that the row names of sample_annotation_combined match the column names of gsva_res_combined: # Check alignment all(rownames(sample_annotation_combined) == colnames(gsva_res_combined)) # If FALSE, ensure that the sample names are correctly matched #Step 4: Plot the Heatmap library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Define colors for each group group_levels <- unique(sample_annotation_combined$group.id) group_colors <- setNames( colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)), group_levels ) # Define colors for cell types cell_type_levels <- unique(sample_annotation_combined$cell.type) cell_type_colors <- setNames( colorRampPalette(brewer.pal(8, "Pastel1"))(length(cell_type_levels)), cell_type_levels ) # Create the HeatmapAnnotation ha <- HeatmapAnnotation( df = sample_annotation_combined, col = list( group.id = group_colors, cell.type = cell_type_colors ), annotation_legend_param = list( group.id = list(title = "Group ID"), cell.type = list(title = "Cell Type") ) ) # Define color function based on data range min_score <- min(gsva_res_combined) max_score <- max(gsva_res_combined) color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red")) # Create the heatmap heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, show_column_names = FALSE, cluster_rows = TRUE, cluster_columns = TRUE, column_title = "Samples", row_title = "Gene Sets", col = color_fun, row_names_gp = gpar(fontsize = 8), # Adjust font size as needed column_split = sample_annotation_combined$cell.type, # Separate columns by cell type column_title_gp = gpar(fontsize = 12, fontface = "bold") ) # Draw the heatmap draw(heatmap) ``` ```{r} # Load necessary libraries library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Step 1: Load and Prepare Data # Load your GSVA results for both cell types gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds") gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds") # Load DESeq datasets for both cell types DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds") DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds") # Process sample names to extract sample IDs # For macrophage samples colnames(gsva_res_macrophage) <- tolower(colnames(gsva_res_macrophage)) colnames(gsva_res_macrophage) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_macrophage)) colnames(gsva_res_macrophage) <- sapply(strsplit(colnames(gsva_res_macrophage), "_"), function(x) tail(x, 1)) colnames(DESeq_macrophage) <- tolower(colnames(DESeq_macrophage)) colnames(DESeq_macrophage) <- gsub("[^a-z0-9_]", "", colnames(DESeq_macrophage)) colData(DESeq_macrophage)$sample_id <- sapply(strsplit(colnames(DESeq_macrophage), "_"), function(x) tail(x, 1)) # For Treg samples colnames(gsva_res_treg) <- tolower(colnames(gsva_res_treg)) colnames(gsva_res_treg) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_treg)) colnames(gsva_res_treg) <- gsub(" ", "_", colnames(gsva_res_treg)) colnames(gsva_res_treg) <- sapply(strsplit(colnames(gsva_res_treg), "_"), function(x) tail(x, 1)) colnames(DESeq_treg) <- tolower(colnames(DESeq_treg)) colnames(DESeq_treg) <- gsub("[^a-z0-9_]", "", colnames(DESeq_treg)) colData(DESeq_treg)$sample_id <- sapply(strsplit(colnames(DESeq_treg), "_"), function(x) tail(x, 1)) # Find common samples among both datasets and DESeq sample IDs common_samples <- Reduce(intersect, list( colnames(gsva_res_macrophage), colnames(gsva_res_treg), colData(DESeq_macrophage)$sample_id, colData(DESeq_treg)$sample_id )) # Subset the GSVA results to include only common samples gsva_res_macrophage_sub <- gsva_res_macrophage[, common_samples] gsva_res_treg_sub <- gsva_res_treg[, common_samples] # Step 2: Select Top 20 Pathways Enriched in Each Cell Type # Get sample IDs for dominant groups moc_samples <- colData(DESeq_macrophage)$sample_id[colData(DESeq_macrophage)$group.id == "MφOC"] moc_samples <- intersect(moc_samples, common_samples) tregtex_samples <- colData(DESeq_treg)$sample_id[colData(DESeq_treg)$group.id == "TregTex"] tregtex_samples <- intersect(tregtex_samples, common_samples) # Compute mean GSVA scores for each pathway mean_gsva_macrophage <- rowMeans(gsva_res_macrophage[, moc_samples, drop = FALSE], na.rm = TRUE) mean_gsva_treg <- rowMeans(gsva_res_treg[, tregtex_samples, drop = FALSE], na.rm = TRUE) # Select top 20 pathways for each cell type top20_macrophage <- names(sort(mean_gsva_macrophage, decreasing = TRUE))[1:10] top20_treg <- names(sort(mean_gsva_treg, decreasing = TRUE))[1:10] # Combine pathways gene_set_names <- unique(c(top20_macrophage, top20_treg)) # Subset GSVA results for selected pathways and common samples gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names, common_samples] gsva_res_treg_sub <- gsva_res_treg[gene_set_names, common_samples] # Combine GSVA results into one matrix gsva_res_combined <- rbind( gsva_res_macrophage_sub[top20_macrophage, ], gsva_res_treg_sub[top20_treg, ] ) # Create pathway_cell_type vector pathway_cell_type <- c(rep("Macrophage", length(top20_macrophage)), rep("Treg", length(top20_treg))) # Step 3: Create Sample Annotations # For common samples sample_annotation <- data.frame( group.id = colData(DESeq_macrophage)$group.id[match(common_samples, colData(DESeq_macrophage)$sample_id)], stringsAsFactors = FALSE ) rownames(sample_annotation) <- common_samples # Define colors for group.id group_levels <- unique(sample_annotation$group.id) group_colors <- setNames( colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)), group_levels ) # Create HeatmapAnnotation ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors), annotation_legend_param = list(group.id = list(title = "Group ID")) ) # Define color function based on data range min_score <- min(gsva_res_combined, na.rm = TRUE) max_score <- max(gsva_res_combined, na.rm = TRUE) color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red")) # Step 4: Plot the Heatmap heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, show_column_names = FALSE, cluster_columns = TRUE, cluster_rows = TRUE, column_title = "Samples", row_title = "Pathways", col = color_fun, row_names_gp = gpar(fontsize = 8), row_split = pathway_cell_type, # Split rows by cell type cluster_column_slices = FALSE, # Cluster columns globally cluster_row_slices = FALSE, # Cluster rows within each cell type row_gap = unit(2, "mm") ) # Draw the heatmap draw(heatmap) ``` ```{r} # Load necessary libraries library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Step 1: Load and Prepare Data # Load your GSVA results for both cell types gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds") gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds") # Load DESeq datasets for both cell types DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds") DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds") # Process sample names to extract sample IDs # For macrophage samples colnames(gsva_res_macrophage) <- tolower(colnames(gsva_res_macrophage)) colnames(gsva_res_macrophage) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_macrophage)) colnames(gsva_res_macrophage) <- sapply(strsplit(colnames(gsva_res_macrophage), "_"), function(x) tail(x, 1)) colnames(DESeq_macrophage) <- tolower(colnames(DESeq_macrophage)) colnames(DESeq_macrophage) <- gsub("[^a-z0-9_]", "", colnames(DESeq_macrophage)) colData(DESeq_macrophage)$sample_id <- sapply(strsplit(colnames(DESeq_macrophage), "_"), function(x) tail(x, 1)) # For Treg samples colnames(gsva_res_treg) <- tolower(colnames(gsva_res_treg)) colnames(gsva_res_treg) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_treg)) colnames(gsva_res_treg) <- gsub(" ", "_", colnames(gsva_res_treg)) colnames(gsva_res_treg) <- sapply(strsplit(colnames(gsva_res_treg), "_"), function(x) tail(x, 1)) colnames(DESeq_treg) <- tolower(colnames(DESeq_treg)) colnames(DESeq_treg) <- gsub("[^a-z0-9_]", "", colnames(DESeq_treg)) colData(DESeq_treg)$sample_id <- sapply(strsplit(colnames(DESeq_treg), "_"), function(x) tail(x, 1)) # Find common samples among both datasets and DESeq sample IDs common_samples <- Reduce(intersect, list( colnames(gsva_res_macrophage), colnames(gsva_res_treg), colData(DESeq_macrophage)$sample_id, colData(DESeq_treg)$sample_id )) # Subset the GSVA results to include only common samples gsva_res_macrophage_sub <- gsva_res_macrophage[, common_samples] gsva_res_treg_sub <- gsva_res_treg[, common_samples] # Step 2: Select Top 20 Pathways Enriched in Each Cell Type # Get sample IDs for dominant groups moc_samples <- colData(DESeq_macrophage)$sample_id[colData(DESeq_macrophage)$group.id == "MφOC"] moc_samples <- intersect(moc_samples, common_samples) tregtex_samples <- colData(DESeq_treg)$sample_id[colData(DESeq_treg)$group.id == "TregTex"] tregtex_samples <- intersect(tregtex_samples, common_samples) # Compute mean GSVA scores for each pathway mean_gsva_macrophage <- rowMeans(gsva_res_macrophage[, moc_samples, drop = FALSE], na.rm = TRUE) mean_gsva_treg <- rowMeans(gsva_res_treg[, tregtex_samples, drop = FALSE], na.rm = TRUE) # Select top 20 pathways for each cell type top20_macrophage <- names(sort(mean_gsva_macrophage, decreasing = TRUE))[1:30] top20_treg <- names(sort(mean_gsva_treg, decreasing = TRUE))[1:30] # Combine pathways gene_set_names <- unique(c(top20_macrophage, top20_treg)) # Subset GSVA results for selected pathways and common samples gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names, common_samples] gsva_res_treg_sub <- gsva_res_treg[gene_set_names, common_samples] # Combine GSVA results into one matrix gsva_res_combined <- rbind( gsva_res_macrophage_sub[top20_macrophage, ], gsva_res_treg_sub[top20_treg, ] ) # Create pathway_cell_type vector pathway_cell_type <- c(rep("Macrophage", length(top20_macrophage)), rep("Treg", length(top20_treg))) # Step 3: Create Sample Annotations # For common samples sample_annotation <- data.frame( group.id = colData(DESeq_macrophage)$group.id[match(common_samples, colData(DESeq_macrophage)$sample_id)], stringsAsFactors = FALSE ) rownames(sample_annotation) <- common_samples # Define colors for group.id group_levels <- unique(sample_annotation$group.id) group_colors <- setNames( colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)), group_levels ) # Create HeatmapAnnotation ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors), annotation_legend_param = list(group.id = list(title = "Group ID")) ) # Define color function based on data range min_score <- min(gsva_res_combined, na.rm = TRUE) max_score <- max(gsva_res_combined, na.rm = TRUE) color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red")) # Step 4: Plot the Heatmap heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, show_column_names = TRUE, cluster_columns = TRUE, cluster_rows = TRUE, column_title = "Samples", row_title = "Pathways", col = color_fun, row_names_gp = gpar(fontsize = 8), row_split = pathway_cell_type, # Split rows by cell type cluster_column_slices = FALSE, # Cluster columns globally cluster_row_slices = FALSE ) # Draw the heatmap draw(heatmap) ``` ```{r} # Load necessary libraries library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Step 1: Load and Prepare Data # Load your GSVA results for both cell types gsva_res_mono <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds") gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds") gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds") # Load DESeq datasets for both cell types DESeq_mono <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds") DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds") DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds") # Process sample names to extract sample IDs # For mono samples colnames(gsva_res_mono) <- tolower(colnames(gsva_res_mono)) colnames(gsva_res_mono) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_mono)) colnames(gsva_res_mono) <- sapply(strsplit(colnames(gsva_res_mono), "_"), function(x) tail(x, 1)) colnames(DESeq_mono) <- tolower(colnames(DESeq_mono)) colnames(DESeq_mono) <- gsub("[^a-z0-9_]", "", colnames(DESeq_mono)) colData(DESeq_mono)$sample_id <- sapply(strsplit(colnames(DESeq_mono), "_"), function(x) tail(x, 1)) # For macrophage samples colnames(gsva_res_macrophage) <- tolower(colnames(gsva_res_macrophage)) colnames(gsva_res_macrophage) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_macrophage)) colnames(gsva_res_macrophage) <- sapply(strsplit(colnames(gsva_res_macrophage), "_"), function(x) tail(x, 1)) colnames(DESeq_macrophage) <- tolower(colnames(DESeq_macrophage)) colnames(DESeq_macrophage) <- gsub("[^a-z0-9_]", "", colnames(DESeq_macrophage)) colData(DESeq_macrophage)$sample_id <- sapply(strsplit(colnames(DESeq_macrophage), "_"), function(x) tail(x, 1)) # For Treg samples colnames(gsva_res_treg) <- tolower(colnames(gsva_res_treg)) colnames(gsva_res_treg) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_treg)) colnames(gsva_res_treg) <- gsub(" ", "_", colnames(gsva_res_treg)) colnames(gsva_res_treg) <- sapply(strsplit(colnames(gsva_res_treg), "_"), function(x) tail(x, 1)) colnames(DESeq_treg) <- tolower(colnames(DESeq_treg)) colnames(DESeq_treg) <- gsub("[^a-z0-9_]", "", colnames(DESeq_treg)) colData(DESeq_treg)$sample_id <- sapply(strsplit(colnames(DESeq_treg), "_"), function(x) tail(x, 1)) # Find common samples among both datasets and DESeq sample IDs common_samples <- Reduce(intersect, list( colnames(gsva_res_mono), colnames(gsva_res_macrophage), colnames(gsva_res_treg), colData(DESeq_mono)$sample_id, colData(DESeq_macrophage)$sample_id, colData(DESeq_treg)$sample_id )) # Subset the GSVA results to include only common samples gsva_res_mono_sub <- gsva_res_mono[, common_samples] gsva_res_macrophage_sub <- gsva_res_macrophage[, common_samples] gsva_res_treg_sub <- gsva_res_treg[, common_samples] # Step 2: Select Top 20 Pathways Enriched in Each Cell Type # Get sample IDs for dominant groups mon_samples <- colData(DESeq_mono)$sample_id[colData(DESeq_mono)$group.id == "Mono"] mon_samples <- intersect(mon_samples, common_samples) moc_samples <- colData(DESeq_macrophage)$sample_id[colData(DESeq_macrophage)$group.id == "MφOC"] moc_samples <- intersect(moc_samples, common_samples) tregtex_samples <- colData(DESeq_treg)$sample_id[colData(DESeq_treg)$group.id == "TregTex"] tregtex_samples <- intersect(tregtex_samples, common_samples) # Compute mean GSVA scores for each pathway mean_gsva_mono <- rowMeans(gsva_res_mono[, mon_samples, drop = FALSE], na.rm = TRUE) mean_gsva_macrophage <- rowMeans(gsva_res_macrophage[, moc_samples, drop = FALSE], na.rm = TRUE) mean_gsva_treg <- rowMeans(gsva_res_treg[, tregtex_samples, drop = FALSE], na.rm = TRUE) # Select top 20 pathways for each cell type top20_mono <- names(sort(mean_gsva_mono, decreasing = TRUE))[1:30] top20_macrophage <- names(sort(mean_gsva_macrophage, decreasing = TRUE))[1:30] top20_treg <- names(sort(mean_gsva_treg, decreasing = TRUE))[1:30] # Combine pathways gene_set_names <- unique(c(top20_mono, top20_macrophage, top20_treg)) # Subset GSVA results for selected pathways and common samples gsva_res_mono_sub <- gsva_res_mono[gene_set_names, common_samples] gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names, common_samples] gsva_res_treg_sub <- gsva_res_treg[gene_set_names, common_samples] # Combine GSVA results into one matrix gsva_res_combined <- rbind( gsva_res_mono_sub[top20_mono, ], gsva_res_macrophage_sub[top20_macrophage, ], gsva_res_treg_sub[top20_treg, ] ) # Create pathway_cell_type vector pathway_cell_type <- c(rep("Monocyte", length(top20_mono)), rep("Macrophage", length(top20_macrophage)), rep("Treg", length(top20_treg))) # Step 3: Create Sample Annotations # For common samples sample_annotation <- data.frame( group.id = colData(DESeq_macrophage)$group.id[match(common_samples, colData(DESeq_macrophage)$sample_id)], stringsAsFactors = FALSE ) rownames(sample_annotation) <- common_samples # Define colors for group.id group_levels <- unique(sample_annotation$group.id) group_colors <- setNames( colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)), group_levels ) # Create HeatmapAnnotation ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors), annotation_legend_param = list(group.id = list(title = "Group ID")) ) # Define color function based on data range min_score <- min(gsva_res_combined, na.rm = TRUE) max_score <- max(gsva_res_combined, na.rm = TRUE) color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red")) # Step 4: Plot the Heatmap heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, show_column_names = T, cluster_columns = TRUE, cluster_rows = TRUE, column_title = "Samples", row_title = "Pathways", col = color_fun, row_names_gp = gpar(fontsize = 8), row_split = pathway_cell_type, # Split rows by cell type cluster_column_slices = FALSE, # Cluster columns globally cluster_row_slices = FALSE, # Cluster rows within each cell type row_gap = unit(2, "mm") ) # Draw the heatmap draw(heatmap) ``` ```{r} # Load necessary libraries library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Step 1: Load and Prepare Data # Load your GSVA results for all cell types gsva_res_mono <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds") gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds") gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds") # Load DESeq datasets for all cell types DESeq_mono <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds") DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds") DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds") # Process sample names to extract sample IDs process_sample_names <- function(gsva_res, DESeq_obj) { colnames(gsva_res) <- tolower(colnames(gsva_res)) colnames(gsva_res) <- gsub("[^a-z0-9_]", "", colnames(gsva_res)) colnames(gsva_res) <- sapply(strsplit(colnames(gsva_res), "_"), function(x) tail(x, 1)) colnames(DESeq_obj) <- tolower(colnames(DESeq_obj)) colnames(DESeq_obj) <- gsub("[^a-z0-9_]", "", colnames(DESeq_obj)) colData(DESeq_obj)$sample_id <- sapply(strsplit(colnames(DESeq_obj), "_"), function(x) tail(x, 1)) list(gsva_res = gsva_res, DESeq_obj = DESeq_obj) } # Process samples for each cell type processed_mono <- process_sample_names(gsva_res_mono, DESeq_mono) gsva_res_mono <- processed_mono$gsva_res DESeq_mono <- processed_mono$DESeq_obj processed_macrophage <- process_sample_names(gsva_res_macrophage, DESeq_macrophage) gsva_res_macrophage <- processed_macrophage$gsva_res DESeq_macrophage <- processed_macrophage$DESeq_obj processed_treg <- process_sample_names(gsva_res_treg, DESeq_treg) gsva_res_treg <- processed_treg$gsva_res DESeq_treg <- processed_treg$DESeq_obj # Find common samples among all datasets and DESeq sample IDs common_samples <- Reduce(intersect, list( colnames(gsva_res_mono), colnames(gsva_res_macrophage), colnames(gsva_res_treg), colData(DESeq_mono)$sample_id, colData(DESeq_macrophage)$sample_id, colData(DESeq_treg)$sample_id )) # Subset the GSVA results to include only common samples gsva_res_mono <- gsva_res_mono[, common_samples] gsva_res_macrophage <- gsva_res_macrophage[, common_samples] gsva_res_treg <- gsva_res_treg[, common_samples] # Step 2: Select Top 30 Pathways Enriched in Each Cell Type # Get sample IDs for dominant groups mon_samples <- colData(DESeq_mono)$sample_id[colData(DESeq_mono)$group.id == "Mono"] mon_samples <- intersect(mon_samples, common_samples) moc_samples <- colData(DESeq_macrophage)$sample_id[colData(DESeq_macrophage)$group.id == "MφOC"] moc_samples <- intersect(moc_samples, common_samples) tregtex_samples <- colData(DESeq_treg)$sample_id[colData(DESeq_treg)$group.id == "TregTex"] tregtex_samples <- intersect(tregtex_samples, common_samples) # Compute mean GSVA scores for each pathway mean_gsva_mono <- rowMeans(gsva_res_mono[, mon_samples, drop = FALSE], na.rm = TRUE) mean_gsva_macrophage <- rowMeans(gsva_res_macrophage[, moc_samples, drop = FALSE], na.rm = TRUE) mean_gsva_treg <- rowMeans(gsva_res_treg[, tregtex_samples, drop = FALSE], na.rm = TRUE) # Select top 30 pathways for each cell type top30_mono <- names(sort(mean_gsva_mono, decreasing = TRUE))[1:30] top30_macrophage <- names(sort(mean_gsva_macrophage, decreasing = TRUE))[1:30] top30_treg <- names(sort(mean_gsva_treg, decreasing = TRUE))[1:30] # Combine pathways gene_set_names <- unique(c(top30_mono, top30_macrophage, top30_treg)) # **Adjust code to prevent subsetting error** # Subset gene_set_names to those present in each gsva_res object gene_set_names_mono <- intersect(gene_set_names, rownames(gsva_res_mono)) gene_set_names_macrophage <- intersect(gene_set_names, rownames(gsva_res_macrophage)) gene_set_names_treg <- intersect(gene_set_names, rownames(gsva_res_treg)) # Subset GSVA results for selected pathways and common samples gsva_res_mono_sub <- gsva_res_mono[gene_set_names_mono, common_samples] gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names_macrophage, common_samples] gsva_res_treg_sub <- gsva_res_treg[gene_set_names_treg, common_samples] # Combine GSVA results into one matrix gsva_res_combined <- rbind( gsva_res_mono_sub, gsva_res_macrophage_sub, gsva_res_treg_sub ) # Create pathway_cell_type vector pathway_cell_type <- c( rep("Monocyte", nrow(gsva_res_mono_sub)), rep("Macrophage", nrow(gsva_res_macrophage_sub)), rep("Treg", nrow(gsva_res_treg_sub)) ) # Step 3: Create Sample Annotations # For common samples sample_annotation <- data.frame( group.id = colData(DESeq_macrophage)$group.id[match(common_samples, colData(DESeq_macrophage)$sample_id)], stringsAsFactors = FALSE ) rownames(sample_annotation) <- common_samples # Define colors for group.id group_levels <- unique(sample_annotation$group.id) group_colors <- setNames( colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)), group_levels ) # Create HeatmapAnnotation ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors), annotation_legend_param = list(group.id = list(title = "Group ID")) ) # Define color function based on data range min_score <- min(gsva_res_combined, na.rm = TRUE) max_score <- max(gsva_res_combined, na.rm = TRUE) color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red")) # Step 4: Plot the Heatmap heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, show_column_names = TRUE, cluster_columns = TRUE, cluster_rows = TRUE, column_title = "Samples", row_title = "Pathways", col = color_fun, row_names_gp = gpar(fontsize = 8), row_split = pathway_cell_type, # Split rows by cell type cluster_column_slices = FALSE, # Cluster columns globally cluster_row_slices = FALSE, # Cluster rows within each cell type row_gap = unit(2, "mm") ) # Draw the heatmap draw(heatmap) ``` ```{r} # Load necessary libraries library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Step 1: Define Cell Types and Their Dominant Groups cell_types <- c("Monocyte", "Macrophage", "Treg", "OC", "Tex") dominant_groups <- c("Mono", "MφOC", "TregTex", "OC", "Tex") # Step 2: Specify File Paths for GSVA and DESeq Results gsva_files <- list( "Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs//gsva_res.C5_immune_bone.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds" ) deseq_files <- list( "Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds" ) # Step 3: Initialize Lists gsva_results <- list() deseq_data <- list() # Helper Function: Clean and Extract Sample IDs extract_sample_id <- function(colnames_vector) { cleaned_names <- tolower(colnames_vector) cleaned_names <- gsub("[^a-z0-9_]", "", cleaned_names) sample_ids <- sapply(strsplit(cleaned_names, "_"), function(x) tail(x, 1)) return(sample_ids) } # Step 4: Load Data for (i in seq_along(cell_types)) { cell_type <- cell_types[i] gsva_file <- gsva_files[[cell_type]] deseq_file <- deseq_files[[cell_type]] if (file.exists(gsva_file)) { gsva_res <- readRDS(gsva_file) colnames(gsva_res) <- extract_sample_id(colnames(gsva_res)) gsva_results[[cell_type]] <- gsva_res } if (file.exists(deseq_file)) { deseq_res <- readRDS(deseq_file) colnames(deseq_res) <- extract_sample_id(colnames(deseq_res)) colData(deseq_res)$sample_id <- extract_sample_id(colnames(deseq_res)) deseq_data[[cell_type]] <- deseq_res } } # Step 5: Identify Common Samples common_samples <- Reduce(intersect, c( lapply(gsva_results, colnames), lapply(deseq_data, function(x) colData(x)$sample_id) )) # Step 6: Exclude "ctrl" Samples sample_group_ids <- data.frame(sample_id = common_samples, group.id = NA, stringsAsFactors = FALSE) for (cell_type in cell_types) { if (cell_type %in% names(deseq_data)) { deseq_sub <- deseq_data[[cell_type]] group_id <- deseq_sub$group.id names(group_id) <- deseq_sub$sample_id samples_in_type <- intersect(names(group_id), sample_group_ids$sample_id) sample_group_ids$group.id[sample_group_ids$sample_id %in% samples_in_type] <- group_id[samples_in_type] } } non_ctrl_samples <- sample_group_ids$sample_id[sample_group_ids$group.id != "ctrl" & !is.na(sample_group_ids$group.id)] gsva_res_final <- lapply(gsva_results, function(x) x[, non_ctrl_samples, drop = FALSE]) # Step 7: Select Top Pathways gsva_top_pathways <- list() pathway_cell_type <- c() for (i in seq_along(cell_types)) { cell_type <- cell_types[i] if (cell_type %in% names(gsva_res_final)) { gsva_res_current <- gsva_res_final[[cell_type]] mean_gsva <- rowMeans(gsva_res_current, na.rm = TRUE) top_pathways <- names(sort(mean_gsva, decreasing = TRUE))[1:10] gsva_top_pathways[[cell_type]] <- gsva_res_current[top_pathways, , drop = FALSE] pathway_cell_type <- c(pathway_cell_type, rep(cell_type, length(top_pathways))) } } gsva_res_combined <- do.call(rbind, gsva_top_pathways) # Ensure Unique Row Names rownames(gsva_res_combined) <- make.unique(rownames(gsva_res_combined)) # Step 8: Create Annotations pathways_df <- data.frame( pathway = rownames(gsva_res_combined), cell_type = factor(pathway_cell_type, levels = cell_types), stringsAsFactors = FALSE ) sample_annotation <- data.frame( group.id = sample_group_ids$group.id[match(colnames(gsva_res_combined), sample_group_ids$sample_id)], stringsAsFactors = FALSE ) rownames(sample_annotation) <- colnames(gsva_res_combined) group_colors <- setNames( brewer.pal(min(8, length(unique(sample_annotation$group.id))), "Set1"), unique(sample_annotation$group.id) ) cell_type_colors <- setNames( brewer.pal(min(8, length(unique(pathways_df$cell_type))), "Set2"), unique(pathways_df$cell_type) ) # Step 9: Generate Heatmap ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors), annotation_legend_param = list(group.id = list(title = "Group ID")) ) heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, row_title = "Pathways", column_title = "Samples", col = colorRamp2(c(min(gsva_res_combined), 0, max(gsva_res_combined)), c("blue", "white", "red")), row_split = pathways_df$cell_type ) # Draw the Heatmap draw(heatmap) ``` ```{r} # Load necessary libraries library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Step 1: Define Cell Types and Their Dominant Groups cell_types <- c("Monocyte", "Macrophage", "Treg", "OC", "Tex") dominant_groups <- c("Mono", "MφOC", "TregTex", "MφOC", "TregTex") # Corrected mapping # Create a named vector for easy mapping group_mapping <- setNames(dominant_groups, cell_types) # Step 2: Specify File Paths for GSVA and DESeq Results gsva_files <- list( "Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds" ) deseq_files <- list( "Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds" ) # Step 3: Initialize Lists gsva_results <- list() deseq_data <- list() # Helper Function: Clean and Extract Sample IDs extract_sample_id <- function(colnames_vector) { # Modify this function based on the actual sample naming convention cleaned_names <- tolower(colnames_vector) cleaned_names <- gsub("[^a-z0-9_]", "", cleaned_names) sample_ids <- sapply(strsplit(cleaned_names, "_"), function(x) tail(x, 1)) return(sample_ids) } # Step 4: Load Data with Debugging for (i in seq_along(cell_types)) { cell_type <- cell_types[i] gsva_file <- gsva_files[[cell_type]] deseq_file <- deseq_files[[cell_type]] cat("Processing Cell Type:", cell_type, "\n") # Load GSVA data if (file.exists(gsva_file)) { gsva_res <- readRDS(gsva_file) original_colnames <- colnames(gsva_res) extracted_sample_ids <- extract_sample_id(original_colnames) # Debugging: Print sample names before and after extraction cat("GSVA Data - Original Sample Names:\n") print(head(original_colnames)) cat("GSVA Data - Extracted Sample IDs:\n") print(head(extracted_sample_ids)) # Assign extracted sample IDs colnames(gsva_res) <- extracted_sample_ids gsva_results[[cell_type]] <- gsva_res } else { warning(paste("GSVA file not found for cell type:", cell_type)) } # Load DESeq data if (file.exists(deseq_file)) { deseq_res <- readRDS(deseq_file) original_colnames <- colnames(deseq_res) extracted_sample_ids <- extract_sample_id(original_colnames) # Debugging: Print sample names before and after extraction cat("DESeq Data - Original Sample Names:\n") print(head(original_colnames)) cat("DESeq Data - Extracted Sample IDs:\n") print(head(extracted_sample_ids)) # Assign extracted sample IDs colnames(deseq_res) <- extracted_sample_ids colData(deseq_res)$sample_id <- extracted_sample_ids deseq_data[[cell_type]] <- deseq_res } else { warning(paste("DESeq file not found for cell type:", cell_type)) } cat("\n") # Add a newline for readability } # Step 5: Identify Common Samples common_samples <- Reduce(intersect, c( lapply(gsva_results, colnames), lapply(deseq_data, function(x) as.character(colData(x)$sample_id)) # Ensure sample_id is character )) cat("Common Samples:\n") print(common_samples) if (length(common_samples) == 0) { stop("No common samples found across all cell types.") } # Step 6: Exclude "ctrl" Samples and Assign Cell Types # Initialize a data frame with sample_id, group.id, and cell_type sample_group_ids <- data.frame(sample_id = common_samples, group.id = NA, cell_type = NA, stringsAsFactors = FALSE) for (cell_type in cell_types) { if (cell_type %in% names(deseq_data)) { deseq_sub <- deseq_data[[cell_type]] group_id <- as.character(deseq_sub$group.id) # Convert to character to prevent factor issues names(group_id) <- deseq_sub$sample_id # Debugging: Print group_id assignments cat("DESeq Data - Cell Type:", cell_type, "\n") cat("Group ID Assignments:\n") print(head(group_id)) samples_in_type <- intersect(names(group_id), sample_group_ids$sample_id) sample_group_ids$group.id[sample_group_ids$sample_id %in% samples_in_type] <- group_id[samples_in_type] sample_group_ids$cell_type[sample_group_ids$sample_id %in% samples_in_type] <- cell_type } } # Debugging: Inspect sample_group_ids cat("Sample Group Assignments:\n") print(head(sample_group_ids)) cat("Sample Group Distribution:\n") print(table(sample_group_ids$cell_type, sample_group_ids$group.id)) # Identify non-control samples non_ctrl_samples <- sample_group_ids$sample_id[sample_group_ids$group.id != "ctrl" & !is.na(sample_group_ids$group.id)] cat("Non-Control Samples:\n") print(non_ctrl_samples) if (length(non_ctrl_samples) == 0) { stop("No non-control samples found.") } # Subset GSVA results to include only non-control samples gsva_res_final <- lapply(gsva_results, function(x) x[, non_ctrl_samples, drop = FALSE]) # Step 7: Select Top Pathways Based on Group-Specific Rankings (Common Pathways) gsva_top_pathways <- list() pathway_cell_type <- c() # Identify common pathways across all cell types common_pathways <- Reduce(intersect, lapply(gsva_res_final, rownames)) cat("Number of Common Pathways:", length(common_pathways), "\n") if (length(common_pathways) == 0) { stop("No common pathways found across all cell types.") } # Initialize a matrix to store mean GSVA scores per cell type for common pathways mean_gsva_matrix <- matrix(NA, nrow = length(common_pathways), ncol = length(cell_types)) rownames(mean_gsva_matrix) <- common_pathways colnames(mean_gsva_matrix) <- cell_types # Calculate mean GSVA scores for each cell type within its dominant group for (cell_type in cell_types) { if (cell_type %in% names(gsva_res_final)) { gsva_res_current <- gsva_res_final[[cell_type]] # Get the dominant group for the current cell type dominant_group <- group_mapping[cell_type] # Identify samples in gsva_res_current that belong to the dominant group and are of the current cell_type samples_in_group <- sample_group_ids$sample_id[ sample_group_ids$group.id == dominant_group & sample_group_ids$cell_type == cell_type ] # Debugging: Print the number of samples found cat("Cell Type:", cell_type, "- Dominant Group:", dominant_group, "- Samples Found:", length(samples_in_group), "\n") # Ensure there are samples in the dominant group if (length(samples_in_group) == 0) { warning(paste("No samples found for group", dominant_group, "in cell type:", cell_type)) next } # Subset the GSVA results to only include samples from the dominant group gsva_subset <- gsva_res_current[, samples_in_group, drop = FALSE] # Check if common pathways are present in this cell type valid_pathways <- intersect(common_pathways, rownames(gsva_subset)) if (length(valid_pathways) == 0) { warning(paste("No valid common pathways found for cell type:", cell_type)) next } # Compute mean GSVA scores for each pathway within the dominant group mean_gsva <- rowMeans(gsva_subset[valid_pathways, , drop = FALSE], na.rm = TRUE) # Store the mean GSVA scores in the matrix mean_gsva_matrix[valid_pathways, cell_type] <- mean_gsva } } # Remove cell types with no mean GSVA scores valid_cell_types <- colnames(mean_gsva_matrix)[colSums(!is.na(mean_gsva_matrix)) > 0] mean_gsva_matrix <- mean_gsva_matrix[, valid_cell_types, drop = FALSE] cat("Valid Cell Types with Mean GSVA Scores:\n") print(valid_cell_types) # Debugging: Check if mean_gsva_matrix has valid data cat("Mean GSVA Matrix:\n") print(head(mean_gsva_matrix)) # Calculate the average mean GSVA score across cell types for each pathway average_mean_gsva <- rowMeans(mean_gsva_matrix, na.rm = TRUE) # Debugging: Print average_mean_gsva cat("Average Mean GSVA Scores:\n") print(head(average_mean_gsva)) # Select the top 30 pathways based on the average mean GSVA scores n_top <- min(50, length(average_mean_gsva)) top_pathways <- names(sort(average_mean_gsva, decreasing = TRUE))[1:n_top] # Debugging: Print top pathways cat("Selected Top Pathways:\n") print(top_pathways) # Subset the GSVA results to include only the top pathways gsva_top_pathways <- lapply(gsva_res_final, function(x) { valid_top_pathways <- intersect(top_pathways, rownames(x)) if (length(valid_top_pathways) == 0) { return(NULL) } else { return(x[valid_top_pathways, , drop = FALSE]) } }) # Remove cell types with no valid pathways gsva_top_pathways <- gsva_top_pathways[!sapply(gsva_top_pathways, is.null)] # Check if any pathways are left after filtering if (length(gsva_top_pathways) == 0) { stop("No valid pathways left after filtering.") } # Combine all top pathways into a single matrix gsva_res_combined <- do.call(rbind, gsva_top_pathways) # Ensure Unique Row Names rownames(gsva_res_combined) <- make.unique(rownames(gsva_res_combined)) # Step 8: Create Annotations pathways_df <- data.frame( pathway = rownames(gsva_res_combined), cell_type = factor(rep(valid_cell_types, each = length(top_pathways)), levels = valid_cell_types), stringsAsFactors = FALSE ) sample_annotation <- data.frame( group.id = sample_group_ids$group.id[match(colnames(gsva_res_combined), sample_group_ids$sample_id)], stringsAsFactors = FALSE ) rownames(sample_annotation) <- colnames(gsva_res_combined) # Debugging: Inspect annotations cat("Sample Annotations:\n") print(head(sample_annotation)) cat("Pathway Annotations:\n") print(head(pathways_df)) # Define Colors for Groups group_ids <- unique(sample_annotation$group.id) n_groups <- length(group_ids) # Ensure at least 3 colors for brewer.pal or handle fewer if (n_groups >= 3) { group_palette <- brewer.pal(min(8, n_groups), "Set1") } else if (n_groups == 2) { group_palette <- c("red", "blue") } else if (n_groups == 1) { group_palette <- "red" } group_colors <- setNames( group_palette, group_ids ) # Define Colors for Cell Types cell_types_with_unique <- unique(pathways_df$cell_type) n_cell_types <- length(cell_types_with_unique) # Handle color palette based on the number of cell types if (n_cell_types >= 3) { cell_type_palette <- brewer.pal(min(8, n_cell_types), "Set2") } else if (n_cell_types == 2) { cell_type_palette <- c("lightblue", "lightgreen") } else if (n_cell_types == 1) { cell_type_palette <- "lightblue" } cell_type_colors <- setNames( cell_type_palette, cell_types_with_unique ) # Add row annotation for cell types row_annotation <- rowAnnotation( Cell_Type = pathways_df$cell_type, col = list(Cell_Type = cell_type_colors), annotation_legend_param = list(Cell_Type = list(title = "Cell Type")) ) # Step 9: Generate Heatmap ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors), annotation_legend_param = list(group.id = list(title = "Group ID")) ) heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, right_annotation = row_annotation, # Add row annotation for cell types row_title = "Pathways", column_title = "Samples", col = colorRamp2(c(min(gsva_res_combined, na.rm = TRUE), 0, max(gsva_res_combined, na.rm = TRUE)), c("blue", "white", "red")), row_split = pathways_df$cell_type, show_row_names = TRUE, show_column_names = FALSE ) # Draw the Heatmap draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right") pdf("./outs/GSVA_by_celltype_summarized/gsva_heatmap_all_pathways.50.pdf", width = 14, height = 30) # Width and height in inches # Draw the heatmap with legends on the right draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right") # Close the graphics device dev.off() #write.csv(gsva_res_combined, './outs/GSVA_by_celltype_summarized/gsva_heatmap_all_pathways.50.csv') ``` ```{r} # Load necessary libraries library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Step 1: Define Cell Types and Their Dominant Groups cell_types <- c("Monocyte", "Macrophage", "Treg", "OC", "Tex") dominant_groups <- c("Mono", "MφOC", "TregTex", "OC", "Tex") # Step 2: Specify File Paths for GSVA and DESeq Results gsva_files <- list( "Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds" ) deseq_files <- list( "Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds" ) # Step 3: Initialize Lists gsva_results <- list() deseq_data <- list() # Helper Function: Clean and Extract Sample IDs extract_sample_id <- function(colnames_vector) { cleaned_names <- tolower(colnames_vector) cleaned_names <- gsub("[^a-z0-9_]", "", cleaned_names) sample_ids <- sapply(strsplit(cleaned_names, "_"), function(x) tail(x, 1)) return(sample_ids) } # Step 4: Load Data for (i in seq_along(cell_types)) { cell_type <- cell_types[i] gsva_file <- gsva_files[[cell_type]] deseq_file <- deseq_files[[cell_type]] if (file.exists(gsva_file)) { gsva_res <- readRDS(gsva_file) colnames(gsva_res) <- extract_sample_id(colnames(gsva_res)) gsva_results[[cell_type]] <- gsva_res } if (file.exists(deseq_file)) { deseq_res <- readRDS(deseq_file) colnames(deseq_res) <- extract_sample_id(colnames(deseq_res)) colData(deseq_res)$sample_id <- extract_sample_id(colnames(deseq_res)) deseq_data[[cell_type]] <- deseq_res } } # Step 5: Identify Common Samples common_samples <- Reduce(intersect, c( lapply(gsva_results, colnames), lapply(deseq_data, function(x) colData(x)$sample_id) )) # Step 6: Exclude "ctrl" Samples sample_group_ids <- data.frame(sample_id = common_samples, group.id = NA, stringsAsFactors = FALSE) for (cell_type in cell_types) { if (cell_type %in% names(deseq_data)) { deseq_sub <- deseq_data[[cell_type]] group_id <- deseq_sub$group.id names(group_id) <- deseq_sub$sample_id samples_in_type <- intersect(names(group_id), sample_group_ids$sample_id) sample_group_ids$group.id[sample_group_ids$sample_id %in% samples_in_type] <- group_id[samples_in_type] } } non_ctrl_samples <- sample_group_ids$sample_id[sample_group_ids$group.id != "ctrl" & !is.na(sample_group_ids$group.id)] gsva_res_final <- lapply(gsva_results, function(x) x[, non_ctrl_samples, drop = FALSE]) # Step 7: Select Unique Top Pathways gsva_top_pathways <- list() pathway_cell_type <- c() # Identify all pathways per cell type pathways_per_cell_type <- lapply(gsva_res_final, rownames) # Identify unique pathways for each cell type unique_pathways <- list() for (cell_type in cell_types) { other_cell_types <- setdiff(cell_types, cell_type) other_pathways <- unique(unlist(pathways_per_cell_type[other_cell_types])) unique_pathways[[cell_type]] <- setdiff(pathways_per_cell_type[[cell_type]], other_pathways) } # Select top pathways from the unique set for (cell_type in cell_types) { if (cell_type %in% names(gsva_res_final)) { unique_ps <- unique_pathways[[cell_type]] if (length(unique_ps) == 0) { warning(paste("No unique pathways found for cell type:", cell_type)) next } gsva_res_current <- gsva_res_final[[cell_type]] # Filter to unique pathways gsva_unique <- gsva_res_current[unique_ps, , drop = FALSE] # Compute mean GSVA score mean_gsva <- rowMeans(gsva_unique, na.rm = TRUE) # Determine the number of top pathways to select (up to 10) n_top <- min(50, length(mean_gsva)) top_pathways <- names(sort(mean_gsva, decreasing = TRUE))[1:n_top] gsva_top_pathways[[cell_type]] <- gsva_unique[top_pathways, , drop = FALSE] pathway_cell_type <- c(pathway_cell_type, rep(cell_type, length(top_pathways))) } } # Check if any unique pathways were selected if (length(gsva_top_pathways) == 0) { stop("No unique pathways were selected for any cell type.") } # Combine all unique top pathways into a single matrix gsva_res_combined <- do.call(rbind, gsva_top_pathways) # Ensure Unique Row Names rownames(gsva_res_combined) <- make.unique(rownames(gsva_res_combined)) # Step 8: Create Annotations pathways_df <- data.frame( pathway = rownames(gsva_res_combined), cell_type = factor(pathway_cell_type, levels = cell_types), stringsAsFactors = FALSE ) sample_annotation <- data.frame( group.id = sample_group_ids$group.id[match(colnames(gsva_res_combined), sample_group_ids$sample_id)], stringsAsFactors = FALSE ) rownames(sample_annotation) <- colnames(gsva_res_combined) # Define Colors for Groups group_ids <- unique(sample_annotation$group.id) n_groups <- length(group_ids) # Ensure at least 3 colors for brewer.pal or handle fewer if (n_groups >= 3) { group_palette <- brewer.pal(min(8, n_groups), "Set1") } else if (n_groups == 2) { group_palette <- c("red", "blue") } else if (n_groups == 1) { group_palette <- "red" } group_colors <- setNames( group_palette, group_ids ) # Define Colors for Cell Types cell_types_with_unique <- unique(pathways_df$cell_type) n_cell_types <- length(cell_types_with_unique) # Handle color palette based on the number of cell types if (n_cell_types >= 3) { cell_type_palette <- brewer.pal(min(8, n_cell_types), "Set2") } else if (n_cell_types == 2) { cell_type_palette <- c("lightblue", "lightgreen") } else if (n_cell_types == 1) { cell_type_palette <- "lightblue" } cell_type_colors <- setNames( cell_type_palette, cell_types_with_unique ) # Add row annotation for cell types row_annotation <- rowAnnotation( Cell_Type = pathways_df$cell_type, col = list(Cell_Type = cell_type_colors), annotation_legend_param = list(Cell_Type = list(title = "Cell Type")) ) # Step 9: Generate Heatmap ha <- HeatmapAnnotation( df = sample_annotation, col = list(group.id = group_colors), annotation_legend_param = list(group.id = list(title = "Group ID")) ) heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, right_annotation = row_annotation, # Add row annotation for cell types row_title = "Pathways", column_title = "Samples", col = colorRamp2(c(min(gsva_res_combined, na.rm = TRUE), 0, max(gsva_res_combined, na.rm = TRUE)), c("blue", "white", "red")), row_split = pathways_df$cell_type, show_row_names = TRUE, show_column_names = FALSE ) # Draw the Heatmap draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right") ``` ```{r} # Load necessary libraries library(ComplexHeatmap) library(circlize) library(RColorBrewer) library(dplyr) # Function to normalize pathway names normalize_pathway_names <- function(pathway_names) { pathway_names <- toupper(pathway_names) pathway_names <- trimws(pathway_names) pathway_names <- gsub("[^A-Z0-9_]", "_", pathway_names) return(pathway_names) } # Step 1: Define Cell Types and Dominant Groups cell_types <- c("Mono", "Macrophage", "OC", "Treg", "Tex") dominant_groups <- c("Mono", "MφOC", "MφOC", "TregTex", "TregTex") # 5 elements corresponding to cell_types # Step 2: Specify File Paths for GSVA and DESeq Datasets gsva_files <- list( "Mono" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds" ) deseq_files <- list( "Mono" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds" ) # Step 3: Load GSVA and DESeq Data for Each Cell Type gsva_results <- list() deseq_data <- list() for (i in seq_along(cell_types)) { cell_type <- cell_types[i] dominant_group <- dominant_groups[i] cat("Loading data for cell type:", cell_type, "\n") # Load GSVA results gsva_file <- gsva_files[[cell_type]] if (!file.exists(gsva_file)) { warning(paste("GSVA file not found for cell type:", cell_type, "at", gsva_file)) next # Skip to the next cell type } gsva_res <- readRDS(gsva_file) # Load DESeq data deseq_file <- deseq_files[[cell_type]] if (!file.exists(deseq_file)) { warning(paste("DESeq file not found for cell type:", cell_type, "at", deseq_file)) next # Skip to the next cell type } deseq_res <- readRDS(deseq_file) # Process sample names for GSVA results colnames(gsva_res) <- tolower(colnames(gsva_res)) colnames(gsva_res) <- gsub("[^a-z0-9_]", "", colnames(gsva_res)) colnames(gsva_res) <- sapply(strsplit(colnames(gsva_res), "_"), function(x) tail(x, 1)) # Process sample names for DESeq data colnames(deseq_res) <- tolower(colnames(deseq_res)) colnames(deseq_res) <- gsub("[^a-z0-9_]", "", colnames(deseq_res)) colData(deseq_res)$sample_id <- sapply(strsplit(colnames(deseq_res), "_"), function(x) tail(x, 1)) # Store in lists gsva_results[[cell_type]] <- gsva_res deseq_data[[cell_type]] <- deseq_res } # Step 4: Find Common Samples Across All Cell Types cat("\nIdentifying common samples across all cell types...\n") gsva_sample_lists <- lapply(gsva_results, colnames) common_samples <- Reduce(intersect, gsva_sample_lists) # Ensure samples are present in DESeq data for each cell type for (cell_type in cell_types) { if (!(cell_type %in% names(deseq_data))) { warning(paste("DESeq data missing for cell type:", cell_type)) next } deseq_sample_ids <- colData(deseq_data[[cell_type]])$sample_id common_samples <- intersect(common_samples, deseq_sample_ids) } # Check if common_samples is non-empty if (length(common_samples) == 0) { stop("No common samples found among all cell types.") } else { cat("Number of common samples:", length(common_samples), "\n") } # Subset the GSVA results to include only common samples gsva_res_sub <- lapply(gsva_results, function(x) x[, common_samples, drop = FALSE]) # Step 5: Select Top 30 Pathways Enriched in Each Cell Type cat("\nSelecting top 30 pathways for each cell type based on dominant groups...\n") gsva_top_pathways <- list() pathway_cell_type <- c() for (i in seq_along(cell_types)) { cell_type <- cell_types[i] dominant_group <- dominant_groups[i] cat("\nProcessing cell type:", cell_type, "\n") # Check if data exists for the cell type if (!(cell_type %in% names(gsva_res_sub)) | !(cell_type %in% names(deseq_data))) { warning(paste("Data missing for cell type:", cell_type)) # Add a dummy pathway to create an empty section dummy_pathway <- paste0("No_Pathways_", cell_type) gsva_top_pathways[[cell_type]] <- matrix(NA, nrow = 1, ncol = length(common_samples)) rownames(gsva_top_pathways[[cell_type]]) <- dummy_pathway pathway_cell_type <- c(pathway_cell_type, cell_type) next # Move to the next cell type } gsva_res_current <- gsva_res_sub[[cell_type]] deseq_current <- deseq_data[[cell_type]] # Get sample IDs for the dominant group group_samples <- deseq_current$sample_id[deseq_current$group.id == dominant_group] group_samples <- intersect(group_samples, common_samples) cat("Number of samples in dominant group (", dominant_group, "): ", length(group_samples), "\n", sep = "") # Check if group_samples is empty if (length(group_samples) == 0) { warning(paste("No samples found for dominant group", dominant_group, "in cell type", cell_type)) # Add a dummy pathway to create an empty section dummy_pathway <- paste0("No_Pathways_", cell_type) gsva_top_pathways[[cell_type]] <- matrix(NA, nrow = 1, ncol = length(common_samples)) rownames(gsva_top_pathways[[cell_type]]) <- dummy_pathway pathway_cell_type <- c(pathway_cell_type, cell_type) next # Skip to the next cell type } # Compute mean GSVA scores for each pathway mean_gsva <- rowMeans(gsva_res_current[, group_samples, drop = FALSE], na.rm = TRUE) # Remove pathways with NA mean scores mean_gsva <- mean_gsva[!is.na(mean_gsva)] if (length(mean_gsva) == 0) { warning(paste("No valid GSVA scores for cell type", cell_type)) # Add a dummy pathway dummy_pathway <- paste0("No_Pathways_", cell_type) gsva_top_pathways[[cell_type]] <- matrix(NA, nrow = 1, ncol = length(common_samples)) rownames(gsva_top_pathways[[cell_type]]) <- dummy_pathway pathway_cell_type <- c(pathway_cell_type, cell_type) next } # Select top 30 pathways num_pathways <- min(10, length(mean_gsva)) top_pathways <- names(sort(mean_gsva, decreasing = TRUE))[1:num_pathways] cat("Selected top", length(top_pathways), "pathways for cell type", cell_type, "\n") # Check if pathways exist in GSVA results missing_pathways <- setdiff(top_pathways, rownames(gsva_res_current)) if (length(missing_pathways) > 0) { warning(paste("Some top pathways are missing for cell type", cell_type, ":", paste(missing_pathways, collapse = ", "))) # Remove missing pathways from top_pathways top_pathways <- intersect(top_pathways, rownames(gsva_res_current)) } # If no pathways remain, add dummy if (length(top_pathways) == 0) { warning(paste("No valid pathways for cell type", cell_type)) dummy_pathway <- paste0("No_Pathways_", cell_type) gsva_top_pathways[[cell_type]] <- matrix(NA, nrow = 1, ncol = length(common_samples)) rownames(gsva_top_pathways[[cell_type]]) <- dummy_pathway pathway_cell_type <- c(pathway_cell_type, cell_type) next } # Subset GSVA results to top pathways gsva_top_subset <- gsva_res_current[top_pathways, ] # Store in list gsva_top_pathways[[cell_type]] <- gsva_top_subset # Record pathway_cell_type pathway_cell_type <- c(pathway_cell_type, rep(cell_type, length(top_pathways))) } # Step 6: Combine Pathways into One Matrix cat("\nCombining top pathways from all cell types into a single matrix...\n") gsva_res_combined <- do.call(rbind, gsva_top_pathways) # Create a data frame for pathway annotations pathways_df <- data.frame( pathway = rownames(gsva_res_combined), cell_type = pathway_cell_type, stringsAsFactors = FALSE ) # Define the order of cell types cell_type_order <- c("Mono", "Macrophage", "OC", "Treg", "Tex") # Make cell_type a factor with specified order pathways_df$cell_type <- factor(pathways_df$cell_type, levels = cell_type_order) # Arrange pathways by cell type pathways_df <- pathways_df %>% arrange(cell_type) # Reorder gsva_res_combined accordingly gsva_res_combined <- gsva_res_combined[pathways_df$pathway, ] # Update pathway_cell_type pathway_cell_type <- pathways_df$cell_type # Step 7: Create Sample Annotations cat("\nCreating sample annotations...\n") # Create sample annotation data frame using group IDs from "Macrophage" if (!("Macrophage" %in% names(deseq_data))) { stop("DESeq data for 'Macrophage' is missing.") } sample_annotation <- data.frame( group.id = deseq_data[["Macrophage"]]$group.id[match(common_samples, deseq_data[["Macrophage"]]$sample_id)], stringsAsFactors = FALSE ) rownames(sample_annotation) <- common_samples # Replace "Mo" with "Mono" in group.id sample_annotation$group.id[sample_annotation$group.id == "Mo"] <- "Mono" # Assign "Unknown" to samples not in desired groups without excluding them desired_groups <- c("Mono", "MφOC", "TregTex") sample_annotation$group.id <- ifelse(sample_annotation$group.id %in% desired_groups, sample_annotation$group.id, "Unknown") # Convert group.id to factor with specified levels, including "Unknown" desired_groups_extended <- c(desired_groups, "Unknown") sample_annotation$group.id <- factor(sample_annotation$group.id, levels = desired_groups_extended) # Define colors for group.id based on levels group_levels <- levels(sample_annotation$group.id) num_groups <- length(group_levels) # Define colors using RColorBrewer's Set1 palette group_colors <- setNames( brewer.pal(n = num_groups, name = "Set1"), group_levels ) # Assign a specific color to "Unknown" if desired (e.g., grey) if ("Unknown" %in% group_levels) { group_colors["Unknown"] <- "grey" } # Replace 'MφOC' with 'MphiOC' to handle special character issues (optional) sample_annotation$group.id <- as.character(sample_annotation$group.id) # Convert to character sample_annotation$group.id <- gsub("MφOC", "MphiOC", sample_annotation$group.id) sample_annotation$group.id <- factor(sample_annotation$group.id, levels = c("Mono", "MphiOC", "TregTex", "Unknown")) # Update group_colors accordingly group_colors <- setNames( brewer.pal(n = length(levels(sample_annotation$group.id)), name = "Set1"), levels(sample_annotation$group.id) ) # Assign a distinct color to "Unknown" if desired if ("Unknown" %in% levels(sample_annotation$group.id)) { group_colors["Unknown"] <- "grey" } # Reorder sample_annotation to match gsva_res_combined column order if (!all(colnames(gsva_res_combined) == rownames(sample_annotation))) { cat("Reordering sample_annotation to match gsva_res_combined column order...\n") sample_annotation <- sample_annotation[colnames(gsva_res_combined), , drop = FALSE] } # Verify alignment alignment <- all(colnames(gsva_res_combined) == rownames(sample_annotation)) cat("Do sample_annotation row names match gsva_res_combined column names? ", alignment, "\n") if (!alignment) { stop("Mismatch between gsva_res_combined columns and sample_annotation row names.") } else { cat("Sample annotations successfully aligned with heatmap columns.\n") } # Print group.id levels and colors for verification cat("Levels of group.id:\n") print(levels(sample_annotation$group.id)) cat("Group Colors Mapping:\n") print(group_colors) # Check for NA values in group.id after handling na_count <- sum(is.na(sample_annotation$group.id)) cat("Number of NA values in group.id after handling:", na_count, "\n") # Ensure no NAs remain if (na_count > 0) { warning("There are NA values in group.id. Assigning them to 'Unknown'.") sample_annotation$group.id[is.na(sample_annotation$group.id)] <- "Unknown" # Re-convert to factor including 'Unknown' sample_annotation$group.id <- factor(sample_annotation$group.id, levels = c("Mono", "MphiOC", "TregTex", "Unknown")) } # Create HeatmapAnnotation for sample annotations ha <- HeatmapAnnotation( df = sample_annotation, col = list( group.id = group_colors ), annotation_legend_param = list( group.id = list(title = "Group ID") ) ) # Step 8: Plot the Heatmap cat("\nGenerating the heatmap...\n") # Define color function based on data range and handle NA values color_fun <- colorRamp2(c(min(gsva_res_combined, na.rm = TRUE), 0, max(gsva_res_combined, na.rm = TRUE)), c("blue", "white", "red")) # Define colors for cell types in row annotations cell_type_colors <- setNames( brewer.pal(n = length(cell_type_order), name = "Dark2"), cell_type_order ) # Create row annotation for cell types row_annotation <- rowAnnotation( Cell_Type = pathway_cell_type, col = list(Cell_Type = cell_type_colors), show_annotation_name = TRUE, annotation_legend_param = list(Cell_Type = list(title = "Cell Type")) ) # Create the heatmap with row_split and row_annotation heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, right_annotation = row_annotation, # Add row annotations show_column_names = FALSE, cluster_columns = TRUE, cluster_rows = FALSE, # Rows are ordered manually column_title = "Samples", row_title = "Pathways", col = color_fun, row_names_gp = gpar(fontsize = 8), row_split = pathway_cell_type, # Split rows by cell type row_order = order(pathway_cell_type, decreasing = FALSE), row_gap = unit(2, "mm"), row_title_rot = 0, row_title_gp = gpar(fontsize = 10, fontface = "bold"), na_col = "gray" # Color for NA values (dummy pathways) ) # Draw the heatmap with legends on the right draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right") #write.csv(gsva_res_combined, './outs/GSVA_by_celltype_summarized/gsva_res_combined.csv') ``` ```{r} # Load necessary libraries library(ComplexHeatmap) library(circlize) library(RColorBrewer) # Step 1: Load and Prepare Data # Define cell types and their corresponding dominant groups cell_types <- c("Mono", "Macrophage", "OC", "Treg", "Tex") dominant_groups <- c("Mono", "MφOC", "MφOC", "TregTex", "TregTex") # 5 elements corresponding to cell_types # Specify file paths for each cell type's GSVA and DESeq results gsva_files <- list( "Mono" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds" ) deseq_files <- list( "Mono" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds", "Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds", "OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds", "Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds", "Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds" ) # Initialize lists to store GSVA results and DESeq data gsva_results <- list() deseq_data <- list() # Load GSVA results and DESeq data for each cell type for (i in seq_along(cell_types)) { cell_type <- cell_types[i] dominant_group <- dominant_groups[i] cat("Loading data for cell type:", cell_type, "\n") # Load GSVA results gsva_file <- gsva_files[[cell_type]] if (!file.exists(gsva_file)) { warning(paste("GSVA file not found for cell type:", cell_type, "at", gsva_file)) next # Skip to the next cell type } gsva_res <- readRDS(gsva_file) # Load DESeq data deseq_file <- deseq_files[[cell_type]] if (!file.exists(deseq_file)) { warning(paste("DESeq file not found for cell type:", cell_type, "at", deseq_file)) next # Skip to the next cell type } deseq_res <- readRDS(deseq_file) # Process sample names to extract sample IDs for GSVA results colnames(gsva_res) <- tolower(colnames(gsva_res)) colnames(gsva_res) <- gsub("[^a-z0-9_]", "", colnames(gsva_res)) colnames(gsva_res) <- sapply(strsplit(colnames(gsva_res), "_"), function(x) tail(x, 1)) # Process sample names to extract sample IDs for DESeq data colnames(deseq_res) <- tolower(colnames(deseq_res)) colnames(deseq_res) <- gsub("[^a-z0-9_]", "", colnames(deseq_res)) colData(deseq_res)$sample_id <- sapply(strsplit(colnames(deseq_res), "_"), function(x) tail(x, 1)) # Store in lists gsva_results[[cell_type]] <- gsva_res deseq_data[[cell_type]] <- deseq_res } # Step 2: Find Common Samples # Identify common samples across all GSVA datasets gsva_sample_lists <- lapply(gsva_results, colnames) common_samples <- Reduce(intersect, gsva_sample_lists) # Ensure samples are present in DESeq data for each cell type for (cell_type in cell_types) { if (!(cell_type %in% names(deseq_data))) { warning(paste("DESeq data missing for cell type:", cell_type)) next } deseq_sample_ids <- colData(deseq_data[[cell_type]])$sample_id common_samples <- intersect(common_samples, deseq_sample_ids) } # Check if common_samples is non-empty if (length(common_samples) == 0) { stop("No common samples found among all cell types.") } else { cat("Number of common samples:", length(common_samples), "\n") } # Subset the GSVA results to include only common samples gsva_res_sub <- lapply(gsva_results, function(x) x[, common_samples, drop = FALSE]) # Step 3: Select Top Pathways Enriched in Each Cell Type # Initialize lists to store top pathways and pathway annotations gsva_top_pathways <- list() pathway_cell_type <- c() for (i in seq_along(cell_types)) { cell_type <- cell_types[i] dominant_group <- dominant_groups[i] cat("\nProcessing cell type:", cell_type, "\n") # Check if data exists for the cell type if (!(cell_type %in% names(gsva_res_sub)) | !(cell_type %in% names(deseq_data))) { warning(paste("Data missing for cell type:", cell_type)) next # Skip to the next cell type } gsva_res_current <- gsva_res_sub[[cell_type]] deseq_current <- deseq_data[[cell_type]] # Get sample IDs for the dominant group group_samples <- deseq_current$sample_id[deseq_current$group.id == dominant_group] group_samples <- intersect(group_samples, common_samples) # Check if group_samples is empty if (length(group_samples) == 0) { warning(paste("No samples found for dominant group", dominant_group, "in cell type", cell_type)) next # Skip to the next cell type } # Compute mean GSVA scores for each pathway mean_gsva <- rowMeans(gsva_res_current[, group_samples, drop = FALSE], na.rm = TRUE) # Handle cases where mean_gsva is NA mean_gsva <- mean_gsva[!is.na(mean_gsva)] if (length(mean_gsva) == 0) { warning(paste("No valid GSVA scores for cell type", cell_type)) next } # Select top pathways (e.g., top 100 or all if fewer) num_pathways <- min(50, length(mean_gsva)) # Adjust the number as needed top_pathways <- names(sort(mean_gsva, decreasing = TRUE))[1:num_pathways] # Check if pathways exist in GSVA results missing_pathways <- setdiff(top_pathways, rownames(gsva_res_current)) if (length(missing_pathways) > 0) { warning(paste("Some top pathways are missing for cell type", cell_type, ":", paste(missing_pathways, collapse = ", "))) # Remove missing pathways from top_pathways top_pathways <- intersect(top_pathways, rownames(gsva_res_current)) } # If no pathways remain, skip if (length(top_pathways) == 0) { warning(paste("No valid pathways for cell type", cell_type)) next } # Subset GSVA results to top pathways gsva_top_subset <- gsva_res_current[top_pathways, ] # Store in list gsva_top_pathways[[cell_type]] <- gsva_top_subset # Record pathway_cell_type pathway_cell_type <- c(pathway_cell_type, rep(cell_type, length(top_pathways))) } # Step 4: Combine Pathways into One Matrix # Combine all top pathways into one matrix gsva_res_combined <- do.call(rbind, gsva_top_pathways) # Create a data frame for pathway annotations pathways_df <- data.frame( pathway = rownames(gsva_res_combined), cell_type = pathway_cell_type, stringsAsFactors = FALSE ) # Define the order of cell types cell_type_order <- c("Mono", "Macrophage", "OC", "Treg", "Tex") # Make cell_type a factor with specified order pathways_df$cell_type <- factor(pathways_df$cell_type, levels = cell_type_order) # Arrange pathways by cell type pathways_df <- pathways_df[order(pathways_df$cell_type), ] # Reorder gsva_res_combined accordingly gsva_res_combined <- gsva_res_combined[pathways_df$pathway, ] # Update pathway_cell_type pathway_cell_type <- pathways_df$cell_type # Step 5: Create Sample Annotations # Create sample annotation data frame using group IDs from a reference cell type (e.g., "Macrophage") # Assuming group.id is consistent across cell types; adjust if necessary reference_cell_type <- "Macrophage" if (!(reference_cell_type %in% names(deseq_data))) { stop(paste("DESeq data for", reference_cell_type, "is missing.")) } sample_annotation <- data.frame( group.id = deseq_data[[reference_cell_type]]$group.id[match(common_samples, deseq_data[[reference_cell_type]]$sample_id)], stringsAsFactors = FALSE ) rownames(sample_annotation) <- common_samples # Replace any unexpected group IDs with "Unknown" (optional) # Modify this section based on your actual group IDs and desired handling sample_annotation$group.id <- ifelse(sample_annotation$group.id %in% c("Mono", "MφOC", "TregTex"), sample_annotation$group.id, "Unknown") # Define colors for group.id group_levels <- unique(sample_annotation$group.id) group_colors <- setNames( brewer.pal(min(length(group_levels), 8), "Set1"), # Use Set1 palette with up to 8 colors group_levels ) # Create HeatmapAnnotation ha <- HeatmapAnnotation( df = sample_annotation, col = list( group.id = group_colors ), annotation_legend_param = list( group.id = list(title = "Group ID") ) ) # Step 6: Plot the Heatmap # Define color function based on data range and handle NA values color_fun <- colorRamp2(c(min(gsva_res_combined, na.rm = TRUE), 0, max(gsva_res_combined, na.rm = TRUE)), c("blue", "white", "red")) # Create the heatmap with adjusted figure size heatmap <- Heatmap( gsva_res_combined, name = "GSVA Score", top_annotation = ha, show_column_names = FALSE, cluster_columns = TRUE, cluster_rows = FALSE, # Rows are ordered manually by cell type column_title = "Samples", row_title = "Pathways", col = color_fun, row_names_gp = gpar(fontsize = 8), row_split = pathway_cell_type, # Split rows by cell type row_order = order(pathway_cell_type, decreasing = FALSE), row_title_rot = 0, row_title_gp = gpar(fontsize = 3, fontface = "bold"), na_col = "gray" # Color for NA values (if any) ) # Save the heatmap to a PDF with specified size #pdf("./outs/GSVA_by_celltype_summarized/gsva_res_combined_imm_bone_top_50_for_10.pdf", width = 10, height = 8) # Width and height in inches # Draw the heatmap with legends on the right draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right") # Close the graphics device #dev.off() # Alternatively, to display in the R plotting window with adjusted size: # Adjust the plotting window size manually or programmatically # windows(width = 14, height = 30) # For Windows OS # quartz(width = 14, height = 30) # For macOS # x11(width = 14, height = 30) # For Linux or cross-platform # Draw the heatmap #draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right") write.csv(gsva_res_combined, './outs/GSVA_by_celltype_summarized/gsva_res_combined_imm_bone_top_50_for_10.csv') ```