---
title: "10.GSVA"
output: html_document
date: "2024-11-21"
---
# Load necessary libraries
```{r}
library(DESeq2)
library(GSVA)
library(limma)
library(ComplexHeatmap)
library(EnhancedVolcano)
library(GSEABase)
library(circlize)
# Function to read GMT file
readGMT <- function(file) {
getGmt(file)
}
```
```{r}
# Normalizing the count data
DESeq <- readRDS("/Volumes/Fengshuo_14T/Lab/Yunfeng/cancer_comparsions/Zenodo/data/DESeq2_obj_archetype_comparsion/Mono.DESeq.rds")
DESeq <- estimateSizeFactors(DESeq)
norm_counts <- counts(DESeq, normalized = TRUE)
# Extract the normalized expression matrix
exprs_data <- assay(DESeq)
```
```{r}
# Step 2: Run GSVA
gene_sets <- readGMT("./data/msigdb_v2023.2.Hs_GMTs/msigdb.v2023.2.Hs.symbols.gmt") # specify the path to your .gmt file
#gene_sets <- readGMT("~/OneDrive - Baylor College of Medicine/Lab/Zhan/202308_IIA_combine_ic/data/MsigDB/msigdb_v2023.2.Hs_GMTs/c5.all.v2023.2.Hs.symbols.gmt") # specify the path to your .gmt file
#gene_sets <- readGMT("~/OneDrive - Baylor College of Medicine/Lab/Zhan/202308_IIA_combine_ic/data/MsigDB/msigdb_v2023.2.Hs_GMTs/C5_immune_bone.gmt") # specify the path to your .gmt file
gsva_res <- gsva(exprs_data, gene_sets, method="gsva", min.sz=10, max.sz=500, verbose=TRUE)
saveRDS(gsva_res, './outs/GSVA_tex/gsva_res.all.rds')
```
#Step 1: Set Up the Design Matrix with All Groups
#First, you need to create a design matrix that includes all your groups. Instead of comparing just two groups, we'll set up the design matrix to model each group separately.
```{r}
# Set up the design matrix with all groups (without an intercept)
design <- model.matrix(~ 0 + group.id, data = colData(DESeq))
colnames(design) <- levels(colData(DESeq)$group.id)
```
#Step 2: Fit the Linear Model
#Use the design matrix to fit the linear model with your GSVA results.
```{r}
# Fit the linear model
fit <- lmFit(gsva_res, design)
```
Step 3: Define Contrasts for Multiple Comparisons
Now, you can define contrasts to compare groups. You can perform:
Pairwise comparisons between all groups.
Comparison of each group against the average of the others.
An overall test to identify any gene sets that differ among the groups.
```{r, Overall Test Using F-Statistics}
#For an overall test to see if there are any differences among the groups:
# Apply empirical Bayes moderation
fit2 <- eBayes(fit)
# Get the top gene sets sorted by F-statistic
top_gene_sets <- topTable(fit2, coef = NULL, number = Inf, sort.by = "F")
# View the top gene sets
head(top_gene_sets)
length(rownames(top_gene_sets))
saveRDS(top_gene_sets, './outs/GSVA_tex/top_gene_sets.all.rds')
```
```{r}
library(ComplexHeatmap)
library(circlize)
# Create a sample annotation data frame
sample_annotation <- data.frame(
group.id = colData(DESeq)$group.id
)
rownames(sample_annotation) <- colnames(gsva_res)
# Define colors for each group
group_levels <- unique(sample_annotation$group.id)
group_colors <- setNames(
colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)),
group_levels
)
# Create a HeatmapAnnotation
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors)
)
# Select top gene sets for visualization
top_gene_set_names <- rownames(top_gene_sets)[1:20] # Top 20 gene sets
# Create the heatmap
heatmap <- Heatmap(
gsva_res[top_gene_set_names, ],
name = "GSVA Score",
top_annotation = ha,
show_column_names = T,
cluster_rows = TRUE,
cluster_columns = T,
column_title = "Samples",
row_title = "Gene Sets"
)
# Draw the heatmap
draw(heatmap)
length(rownames(top_gene_sets))
```
```{r}
#filtering the pathways:
# Assuming 'top_gene_sets' is your results DataFrame from the limma analysis
# Filter pathways with adjusted p-value less than 0.05
significant_gene_sets <- top_gene_sets[top_gene_sets$adj.P.Val < 0.05, ]
# Check if 'MφOC' column exists in your DataFrame
# If the column name is different, replace 'MφOC' with the correct column name
# Sort the significant pathways by the 'MφOC' column in descending order
ranked_gene_sets <- significant_gene_sets[order(-significant_gene_sets$`TregTex`), ]
# View the first few rows of the ranked gene sets
head(ranked_gene_sets)
length(rownames(ranked_gene_sets))
write.csv(ranked_gene_sets, './outs/GSVA_by_celltype_summarized_2/Tex_ranked_gene_sets.csv')
saveRDS(ranked_gene_sets, './outs/GSVA_by_celltype_summarized_2/Tex_ranked_gene_sets.rds')
```
```{r}
# Select Gene Sets for Plotting
# Specify the number of gene sets to plot
num_gene_sets <- 200
# Get the names of the top gene sets
gene_set_names <- rownames(ranked_gene_sets)[1:num_gene_sets]
# Ensure that you don't exceed the number of available gene sets
gene_set_names <- gene_set_names[!is.na(gene_set_names)]
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Create the sample annotation data frame
sample_annotation <- data.frame(
group.id = colData(DESeq)$group.id
)
rownames(sample_annotation) <- colnames(gsva_res)
# Define colors for each group
group_levels <- unique(sample_annotation$group.id)
group_colors <- setNames(
colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)),
group_levels
)
# Create a HeatmapAnnotation
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors)
)
# Subset the gsva_res matrix to include only the selected gene sets
gsva_res_sub <- gsva_res[gene_set_names, ]
# Define color function based on data range
min_score <- min(gsva_res_sub)
max_score <- max(gsva_res_sub)
color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red"))
# Create the heatmap
heatmap <- Heatmap(
gsva_res_sub,
name = "GSVA Score",
top_annotation = ha,
show_column_names = FALSE,
cluster_rows = TRUE,
cluster_columns = TRUE,
column_title = "Samples",
row_title = "Gene Sets",
col = color_fun,
row_names_gp = gpar(fontsize = 6) # Adjust font size as needed
)
# Draw the heatmap
draw(heatmap)
```
```{r}
# Get the names of the gene sets to plot
gene_set_names <- rownames(ranked_gene_sets)
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Create the sample annotation data frame
sample_annotation <- data.frame(
group.id = colData(DESeq)$group.id
)
rownames(sample_annotation) <- colnames(gsva_res)
# Define colors for each group
group_levels <- unique(sample_annotation$group.id)
group_colors <- setNames(
colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)),
group_levels
)
# Create a HeatmapAnnotation
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors)
)
# Subset the gsva_res matrix to include only the selected gene sets
gsva_res_sub <- gsva_res[gene_set_names, ]
# Ensure the gene sets are ordered as in 'ranked_gene_sets'
gsva_res_sub <- gsva_res_sub[gene_set_names, ]
# Define color function based on data range
min_score <- min(gsva_res_sub)
max_score <- max(gsva_res_sub)
color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red"))
# Adjust figure size by setting width and height
heatmap <- Heatmap(
gsva_res_sub,
name = "GSVA Score",
top_annotation = ha,
show_column_names = FALSE,
cluster_rows = TRUE,
cluster_columns = TRUE,
column_title = "Samples",
row_title = "Gene Sets",
col = color_fun,
row_names_gp = gpar(fontsize = 10)
)
# Draw the heatmap
draw(heatmap)
```
##joint the celltypes
```{r}
#Step 1: Load and Prepare Data
# Load your ranked_gene_sets data frames
# Replace these with your actual data frames
ranked_gene_sets_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/top_gene_sets.rds")
length(rownames(ranked_gene_sets_macrophage))
ranked_gene_sets_treg <- top_gene_sets # For Treg cells
length(rownames(ranked_gene_sets_treg))
# Find common gene set names
common_gene_sets <- intersect(rownames(ranked_gene_sets_macrophage), rownames(ranked_gene_sets_treg))
length(common_gene_sets)
# Decide how many gene sets to plot
num_gene_sets <- 50 # Adjust as needed
# From the common gene sets, select the top gene sets
# Here, we'll use the order from the macrophage dataset
gene_set_names <- rownames(ranked_gene_sets_macrophage)[rownames(ranked_gene_sets_macrophage) %in% common_gene_sets][1:num_gene_sets]
# Ensure that you don't exceed the number of available gene sets
gene_set_names <- gene_set_names[!is.na(gene_set_names)]
#Step 2: Extract and Combine GSVA Results
#Subset the GSVA results to include only the selected gene sets:
# Subset the GSVA results for the selected gene sets
gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5.rds")
gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names, ]
gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5.rds")
gsva_res_treg_sub <- gsva_res_treg[gene_set_names, ]
#Adjust sample names to include cell type information:
# Append cell type to sample names for uniqueness
colnames(gsva_res_macrophage_sub) <- paste0(colnames(gsva_res_macrophage_sub), "_Macrophage")
colnames(gsva_res_treg_sub) <- paste0(colnames(gsva_res_treg_sub), "_Treg")
#Combine the GSVA results:
# Combine the GSVA results into one matrix
gsva_res_combined <- cbind(gsva_res_macrophage_sub, gsva_res_treg_sub)
#Step 3: Create Sample Annotations
#Create sample annotations for both cell types:
# For macrophage samples
DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds")
sample_annotation_macrophage <- data.frame(
group.id = colData(DESeq_macrophage)$group.id,
cell.type = "Macrophage"
)
rownames(sample_annotation_macrophage) <- paste0(colnames(gsva_res_macrophage_sub), "_Macrophage")
# For Treg samples
DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds")
sample_annotation_treg <- data.frame(
group.id = colData(DESeq_treg)$group.id,
cell.type = "Treg"
)
rownames(sample_annotation_treg) <- paste0(colnames(gsva_res_treg_sub), "_Treg")
# Combine the sample annotations
sample_annotation_combined <- rbind(sample_annotation_macrophage, sample_annotation_treg)
#Ensure that the row names of sample_annotation_combined match the column names of gsva_res_combined:
# Check alignment
all(rownames(sample_annotation_combined) == colnames(gsva_res_combined))
# If FALSE, ensure that the sample names are correctly matched
#Step 4: Plot the Heatmap
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Define colors for each group
group_levels <- unique(sample_annotation_combined$group.id)
group_colors <- setNames(
colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)),
group_levels
)
# Define colors for cell types
cell_type_levels <- unique(sample_annotation_combined$cell.type)
cell_type_colors <- setNames(
colorRampPalette(brewer.pal(8, "Pastel1"))(length(cell_type_levels)),
cell_type_levels
)
# Create the HeatmapAnnotation
ha <- HeatmapAnnotation(
df = sample_annotation_combined,
col = list(
group.id = group_colors,
cell.type = cell_type_colors
),
annotation_legend_param = list(
group.id = list(title = "Group ID"),
cell.type = list(title = "Cell Type")
)
)
# Define color function based on data range
min_score <- min(gsva_res_combined)
max_score <- max(gsva_res_combined)
color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red"))
# Create the heatmap
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
show_column_names = FALSE,
cluster_rows = TRUE,
cluster_columns = TRUE,
column_title = "Samples",
row_title = "Gene Sets",
col = color_fun,
row_names_gp = gpar(fontsize = 8), # Adjust font size as needed
column_split = sample_annotation_combined$cell.type, # Separate columns by cell type
column_title_gp = gpar(fontsize = 12, fontface = "bold")
)
# Draw the heatmap
draw(heatmap)
```
```{r}
# Load necessary libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Step 1: Load and Prepare Data
# Load your GSVA results for both cell types
gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds")
gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds")
# Load DESeq datasets for both cell types
DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds")
DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds")
# Process sample names to extract sample IDs
# For macrophage samples
colnames(gsva_res_macrophage) <- tolower(colnames(gsva_res_macrophage))
colnames(gsva_res_macrophage) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_macrophage))
colnames(gsva_res_macrophage) <- sapply(strsplit(colnames(gsva_res_macrophage), "_"), function(x) tail(x, 1))
colnames(DESeq_macrophage) <- tolower(colnames(DESeq_macrophage))
colnames(DESeq_macrophage) <- gsub("[^a-z0-9_]", "", colnames(DESeq_macrophage))
colData(DESeq_macrophage)$sample_id <- sapply(strsplit(colnames(DESeq_macrophage), "_"), function(x) tail(x, 1))
# For Treg samples
colnames(gsva_res_treg) <- tolower(colnames(gsva_res_treg))
colnames(gsva_res_treg) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_treg))
colnames(gsva_res_treg) <- gsub(" ", "_", colnames(gsva_res_treg))
colnames(gsva_res_treg) <- sapply(strsplit(colnames(gsva_res_treg), "_"), function(x) tail(x, 1))
colnames(DESeq_treg) <- tolower(colnames(DESeq_treg))
colnames(DESeq_treg) <- gsub("[^a-z0-9_]", "", colnames(DESeq_treg))
colData(DESeq_treg)$sample_id <- sapply(strsplit(colnames(DESeq_treg), "_"), function(x) tail(x, 1))
# Find common samples among both datasets and DESeq sample IDs
common_samples <- Reduce(intersect, list(
colnames(gsva_res_macrophage),
colnames(gsva_res_treg),
colData(DESeq_macrophage)$sample_id,
colData(DESeq_treg)$sample_id
))
# Subset the GSVA results to include only common samples
gsva_res_macrophage_sub <- gsva_res_macrophage[, common_samples]
gsva_res_treg_sub <- gsva_res_treg[, common_samples]
# Step 2: Select Top 20 Pathways Enriched in Each Cell Type
# Get sample IDs for dominant groups
moc_samples <- colData(DESeq_macrophage)$sample_id[colData(DESeq_macrophage)$group.id == "MφOC"]
moc_samples <- intersect(moc_samples, common_samples)
tregtex_samples <- colData(DESeq_treg)$sample_id[colData(DESeq_treg)$group.id == "TregTex"]
tregtex_samples <- intersect(tregtex_samples, common_samples)
# Compute mean GSVA scores for each pathway
mean_gsva_macrophage <- rowMeans(gsva_res_macrophage[, moc_samples, drop = FALSE], na.rm = TRUE)
mean_gsva_treg <- rowMeans(gsva_res_treg[, tregtex_samples, drop = FALSE], na.rm = TRUE)
# Select top 20 pathways for each cell type
top20_macrophage <- names(sort(mean_gsva_macrophage, decreasing = TRUE))[1:10]
top20_treg <- names(sort(mean_gsva_treg, decreasing = TRUE))[1:10]
# Combine pathways
gene_set_names <- unique(c(top20_macrophage, top20_treg))
# Subset GSVA results for selected pathways and common samples
gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names, common_samples]
gsva_res_treg_sub <- gsva_res_treg[gene_set_names, common_samples]
# Combine GSVA results into one matrix
gsva_res_combined <- rbind(
gsva_res_macrophage_sub[top20_macrophage, ],
gsva_res_treg_sub[top20_treg, ]
)
# Create pathway_cell_type vector
pathway_cell_type <- c(rep("Macrophage", length(top20_macrophage)), rep("Treg", length(top20_treg)))
# Step 3: Create Sample Annotations
# For common samples
sample_annotation <- data.frame(
group.id = colData(DESeq_macrophage)$group.id[match(common_samples, colData(DESeq_macrophage)$sample_id)],
stringsAsFactors = FALSE
)
rownames(sample_annotation) <- common_samples
# Define colors for group.id
group_levels <- unique(sample_annotation$group.id)
group_colors <- setNames(
colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)),
group_levels
)
# Create HeatmapAnnotation
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors),
annotation_legend_param = list(group.id = list(title = "Group ID"))
)
# Define color function based on data range
min_score <- min(gsva_res_combined, na.rm = TRUE)
max_score <- max(gsva_res_combined, na.rm = TRUE)
color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red"))
# Step 4: Plot the Heatmap
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
show_column_names = FALSE,
cluster_columns = TRUE,
cluster_rows = TRUE,
column_title = "Samples",
row_title = "Pathways",
col = color_fun,
row_names_gp = gpar(fontsize = 8),
row_split = pathway_cell_type, # Split rows by cell type
cluster_column_slices = FALSE, # Cluster columns globally
cluster_row_slices = FALSE, # Cluster rows within each cell type
row_gap = unit(2, "mm")
)
# Draw the heatmap
draw(heatmap)
```
```{r}
# Load necessary libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Step 1: Load and Prepare Data
# Load your GSVA results for both cell types
gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds")
gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds")
# Load DESeq datasets for both cell types
DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds")
DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds")
# Process sample names to extract sample IDs
# For macrophage samples
colnames(gsva_res_macrophage) <- tolower(colnames(gsva_res_macrophage))
colnames(gsva_res_macrophage) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_macrophage))
colnames(gsva_res_macrophage) <- sapply(strsplit(colnames(gsva_res_macrophage), "_"), function(x) tail(x, 1))
colnames(DESeq_macrophage) <- tolower(colnames(DESeq_macrophage))
colnames(DESeq_macrophage) <- gsub("[^a-z0-9_]", "", colnames(DESeq_macrophage))
colData(DESeq_macrophage)$sample_id <- sapply(strsplit(colnames(DESeq_macrophage), "_"), function(x) tail(x, 1))
# For Treg samples
colnames(gsva_res_treg) <- tolower(colnames(gsva_res_treg))
colnames(gsva_res_treg) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_treg))
colnames(gsva_res_treg) <- gsub(" ", "_", colnames(gsva_res_treg))
colnames(gsva_res_treg) <- sapply(strsplit(colnames(gsva_res_treg), "_"), function(x) tail(x, 1))
colnames(DESeq_treg) <- tolower(colnames(DESeq_treg))
colnames(DESeq_treg) <- gsub("[^a-z0-9_]", "", colnames(DESeq_treg))
colData(DESeq_treg)$sample_id <- sapply(strsplit(colnames(DESeq_treg), "_"), function(x) tail(x, 1))
# Find common samples among both datasets and DESeq sample IDs
common_samples <- Reduce(intersect, list(
colnames(gsva_res_macrophage),
colnames(gsva_res_treg),
colData(DESeq_macrophage)$sample_id,
colData(DESeq_treg)$sample_id
))
# Subset the GSVA results to include only common samples
gsva_res_macrophage_sub <- gsva_res_macrophage[, common_samples]
gsva_res_treg_sub <- gsva_res_treg[, common_samples]
# Step 2: Select Top 20 Pathways Enriched in Each Cell Type
# Get sample IDs for dominant groups
moc_samples <- colData(DESeq_macrophage)$sample_id[colData(DESeq_macrophage)$group.id == "MφOC"]
moc_samples <- intersect(moc_samples, common_samples)
tregtex_samples <- colData(DESeq_treg)$sample_id[colData(DESeq_treg)$group.id == "TregTex"]
tregtex_samples <- intersect(tregtex_samples, common_samples)
# Compute mean GSVA scores for each pathway
mean_gsva_macrophage <- rowMeans(gsva_res_macrophage[, moc_samples, drop = FALSE], na.rm = TRUE)
mean_gsva_treg <- rowMeans(gsva_res_treg[, tregtex_samples, drop = FALSE], na.rm = TRUE)
# Select top 20 pathways for each cell type
top20_macrophage <- names(sort(mean_gsva_macrophage, decreasing = TRUE))[1:30]
top20_treg <- names(sort(mean_gsva_treg, decreasing = TRUE))[1:30]
# Combine pathways
gene_set_names <- unique(c(top20_macrophage, top20_treg))
# Subset GSVA results for selected pathways and common samples
gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names, common_samples]
gsva_res_treg_sub <- gsva_res_treg[gene_set_names, common_samples]
# Combine GSVA results into one matrix
gsva_res_combined <- rbind(
gsva_res_macrophage_sub[top20_macrophage, ],
gsva_res_treg_sub[top20_treg, ]
)
# Create pathway_cell_type vector
pathway_cell_type <- c(rep("Macrophage", length(top20_macrophage)), rep("Treg", length(top20_treg)))
# Step 3: Create Sample Annotations
# For common samples
sample_annotation <- data.frame(
group.id = colData(DESeq_macrophage)$group.id[match(common_samples, colData(DESeq_macrophage)$sample_id)],
stringsAsFactors = FALSE
)
rownames(sample_annotation) <- common_samples
# Define colors for group.id
group_levels <- unique(sample_annotation$group.id)
group_colors <- setNames(
colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)),
group_levels
)
# Create HeatmapAnnotation
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors),
annotation_legend_param = list(group.id = list(title = "Group ID"))
)
# Define color function based on data range
min_score <- min(gsva_res_combined, na.rm = TRUE)
max_score <- max(gsva_res_combined, na.rm = TRUE)
color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red"))
# Step 4: Plot the Heatmap
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
show_column_names = TRUE,
cluster_columns = TRUE,
cluster_rows = TRUE,
column_title = "Samples",
row_title = "Pathways",
col = color_fun,
row_names_gp = gpar(fontsize = 8),
row_split = pathway_cell_type, # Split rows by cell type
cluster_column_slices = FALSE, # Cluster columns globally
cluster_row_slices = FALSE
)
# Draw the heatmap
draw(heatmap)
```
```{r}
# Load necessary libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Step 1: Load and Prepare Data
# Load your GSVA results for both cell types
gsva_res_mono <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds")
gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds")
gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds")
# Load DESeq datasets for both cell types
DESeq_mono <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds")
DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds")
DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds")
# Process sample names to extract sample IDs
# For mono samples
colnames(gsva_res_mono) <- tolower(colnames(gsva_res_mono))
colnames(gsva_res_mono) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_mono))
colnames(gsva_res_mono) <- sapply(strsplit(colnames(gsva_res_mono), "_"), function(x) tail(x, 1))
colnames(DESeq_mono) <- tolower(colnames(DESeq_mono))
colnames(DESeq_mono) <- gsub("[^a-z0-9_]", "", colnames(DESeq_mono))
colData(DESeq_mono)$sample_id <- sapply(strsplit(colnames(DESeq_mono), "_"), function(x) tail(x, 1))
# For macrophage samples
colnames(gsva_res_macrophage) <- tolower(colnames(gsva_res_macrophage))
colnames(gsva_res_macrophage) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_macrophage))
colnames(gsva_res_macrophage) <- sapply(strsplit(colnames(gsva_res_macrophage), "_"), function(x) tail(x, 1))
colnames(DESeq_macrophage) <- tolower(colnames(DESeq_macrophage))
colnames(DESeq_macrophage) <- gsub("[^a-z0-9_]", "", colnames(DESeq_macrophage))
colData(DESeq_macrophage)$sample_id <- sapply(strsplit(colnames(DESeq_macrophage), "_"), function(x) tail(x, 1))
# For Treg samples
colnames(gsva_res_treg) <- tolower(colnames(gsva_res_treg))
colnames(gsva_res_treg) <- gsub("[^a-z0-9_]", "", colnames(gsva_res_treg))
colnames(gsva_res_treg) <- gsub(" ", "_", colnames(gsva_res_treg))
colnames(gsva_res_treg) <- sapply(strsplit(colnames(gsva_res_treg), "_"), function(x) tail(x, 1))
colnames(DESeq_treg) <- tolower(colnames(DESeq_treg))
colnames(DESeq_treg) <- gsub("[^a-z0-9_]", "", colnames(DESeq_treg))
colData(DESeq_treg)$sample_id <- sapply(strsplit(colnames(DESeq_treg), "_"), function(x) tail(x, 1))
# Find common samples among both datasets and DESeq sample IDs
common_samples <- Reduce(intersect, list(
colnames(gsva_res_mono),
colnames(gsva_res_macrophage),
colnames(gsva_res_treg),
colData(DESeq_mono)$sample_id,
colData(DESeq_macrophage)$sample_id,
colData(DESeq_treg)$sample_id
))
# Subset the GSVA results to include only common samples
gsva_res_mono_sub <- gsva_res_mono[, common_samples]
gsva_res_macrophage_sub <- gsva_res_macrophage[, common_samples]
gsva_res_treg_sub <- gsva_res_treg[, common_samples]
# Step 2: Select Top 20 Pathways Enriched in Each Cell Type
# Get sample IDs for dominant groups
mon_samples <- colData(DESeq_mono)$sample_id[colData(DESeq_mono)$group.id == "Mono"]
mon_samples <- intersect(mon_samples, common_samples)
moc_samples <- colData(DESeq_macrophage)$sample_id[colData(DESeq_macrophage)$group.id == "MφOC"]
moc_samples <- intersect(moc_samples, common_samples)
tregtex_samples <- colData(DESeq_treg)$sample_id[colData(DESeq_treg)$group.id == "TregTex"]
tregtex_samples <- intersect(tregtex_samples, common_samples)
# Compute mean GSVA scores for each pathway
mean_gsva_mono <- rowMeans(gsva_res_mono[, mon_samples, drop = FALSE], na.rm = TRUE)
mean_gsva_macrophage <- rowMeans(gsva_res_macrophage[, moc_samples, drop = FALSE], na.rm = TRUE)
mean_gsva_treg <- rowMeans(gsva_res_treg[, tregtex_samples, drop = FALSE], na.rm = TRUE)
# Select top 20 pathways for each cell type
top20_mono <- names(sort(mean_gsva_mono, decreasing = TRUE))[1:30]
top20_macrophage <- names(sort(mean_gsva_macrophage, decreasing = TRUE))[1:30]
top20_treg <- names(sort(mean_gsva_treg, decreasing = TRUE))[1:30]
# Combine pathways
gene_set_names <- unique(c(top20_mono, top20_macrophage, top20_treg))
# Subset GSVA results for selected pathways and common samples
gsva_res_mono_sub <- gsva_res_mono[gene_set_names, common_samples]
gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names, common_samples]
gsva_res_treg_sub <- gsva_res_treg[gene_set_names, common_samples]
# Combine GSVA results into one matrix
gsva_res_combined <- rbind(
gsva_res_mono_sub[top20_mono, ],
gsva_res_macrophage_sub[top20_macrophage, ],
gsva_res_treg_sub[top20_treg, ]
)
# Create pathway_cell_type vector
pathway_cell_type <- c(rep("Monocyte", length(top20_mono)), rep("Macrophage", length(top20_macrophage)), rep("Treg", length(top20_treg)))
# Step 3: Create Sample Annotations
# For common samples
sample_annotation <- data.frame(
group.id = colData(DESeq_macrophage)$group.id[match(common_samples, colData(DESeq_macrophage)$sample_id)],
stringsAsFactors = FALSE
)
rownames(sample_annotation) <- common_samples
# Define colors for group.id
group_levels <- unique(sample_annotation$group.id)
group_colors <- setNames(
colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)),
group_levels
)
# Create HeatmapAnnotation
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors),
annotation_legend_param = list(group.id = list(title = "Group ID"))
)
# Define color function based on data range
min_score <- min(gsva_res_combined, na.rm = TRUE)
max_score <- max(gsva_res_combined, na.rm = TRUE)
color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red"))
# Step 4: Plot the Heatmap
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
show_column_names = T,
cluster_columns = TRUE,
cluster_rows = TRUE,
column_title = "Samples",
row_title = "Pathways",
col = color_fun,
row_names_gp = gpar(fontsize = 8),
row_split = pathway_cell_type, # Split rows by cell type
cluster_column_slices = FALSE, # Cluster columns globally
cluster_row_slices = FALSE, # Cluster rows within each cell type
row_gap = unit(2, "mm")
)
# Draw the heatmap
draw(heatmap)
```
```{r}
# Load necessary libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Step 1: Load and Prepare Data
# Load your GSVA results for all cell types
gsva_res_mono <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds")
gsva_res_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds")
gsva_res_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds")
# Load DESeq datasets for all cell types
DESeq_mono <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds")
DESeq_macrophage <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds")
DESeq_treg <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds")
# Process sample names to extract sample IDs
process_sample_names <- function(gsva_res, DESeq_obj) {
colnames(gsva_res) <- tolower(colnames(gsva_res))
colnames(gsva_res) <- gsub("[^a-z0-9_]", "", colnames(gsva_res))
colnames(gsva_res) <- sapply(strsplit(colnames(gsva_res), "_"), function(x) tail(x, 1))
colnames(DESeq_obj) <- tolower(colnames(DESeq_obj))
colnames(DESeq_obj) <- gsub("[^a-z0-9_]", "", colnames(DESeq_obj))
colData(DESeq_obj)$sample_id <- sapply(strsplit(colnames(DESeq_obj), "_"), function(x) tail(x, 1))
list(gsva_res = gsva_res, DESeq_obj = DESeq_obj)
}
# Process samples for each cell type
processed_mono <- process_sample_names(gsva_res_mono, DESeq_mono)
gsva_res_mono <- processed_mono$gsva_res
DESeq_mono <- processed_mono$DESeq_obj
processed_macrophage <- process_sample_names(gsva_res_macrophage, DESeq_macrophage)
gsva_res_macrophage <- processed_macrophage$gsva_res
DESeq_macrophage <- processed_macrophage$DESeq_obj
processed_treg <- process_sample_names(gsva_res_treg, DESeq_treg)
gsva_res_treg <- processed_treg$gsva_res
DESeq_treg <- processed_treg$DESeq_obj
# Find common samples among all datasets and DESeq sample IDs
common_samples <- Reduce(intersect, list(
colnames(gsva_res_mono),
colnames(gsva_res_macrophage),
colnames(gsva_res_treg),
colData(DESeq_mono)$sample_id,
colData(DESeq_macrophage)$sample_id,
colData(DESeq_treg)$sample_id
))
# Subset the GSVA results to include only common samples
gsva_res_mono <- gsva_res_mono[, common_samples]
gsva_res_macrophage <- gsva_res_macrophage[, common_samples]
gsva_res_treg <- gsva_res_treg[, common_samples]
# Step 2: Select Top 30 Pathways Enriched in Each Cell Type
# Get sample IDs for dominant groups
mon_samples <- colData(DESeq_mono)$sample_id[colData(DESeq_mono)$group.id == "Mono"]
mon_samples <- intersect(mon_samples, common_samples)
moc_samples <- colData(DESeq_macrophage)$sample_id[colData(DESeq_macrophage)$group.id == "MφOC"]
moc_samples <- intersect(moc_samples, common_samples)
tregtex_samples <- colData(DESeq_treg)$sample_id[colData(DESeq_treg)$group.id == "TregTex"]
tregtex_samples <- intersect(tregtex_samples, common_samples)
# Compute mean GSVA scores for each pathway
mean_gsva_mono <- rowMeans(gsva_res_mono[, mon_samples, drop = FALSE], na.rm = TRUE)
mean_gsva_macrophage <- rowMeans(gsva_res_macrophage[, moc_samples, drop = FALSE], na.rm = TRUE)
mean_gsva_treg <- rowMeans(gsva_res_treg[, tregtex_samples, drop = FALSE], na.rm = TRUE)
# Select top 30 pathways for each cell type
top30_mono <- names(sort(mean_gsva_mono, decreasing = TRUE))[1:30]
top30_macrophage <- names(sort(mean_gsva_macrophage, decreasing = TRUE))[1:30]
top30_treg <- names(sort(mean_gsva_treg, decreasing = TRUE))[1:30]
# Combine pathways
gene_set_names <- unique(c(top30_mono, top30_macrophage, top30_treg))
# **Adjust code to prevent subsetting error**
# Subset gene_set_names to those present in each gsva_res object
gene_set_names_mono <- intersect(gene_set_names, rownames(gsva_res_mono))
gene_set_names_macrophage <- intersect(gene_set_names, rownames(gsva_res_macrophage))
gene_set_names_treg <- intersect(gene_set_names, rownames(gsva_res_treg))
# Subset GSVA results for selected pathways and common samples
gsva_res_mono_sub <- gsva_res_mono[gene_set_names_mono, common_samples]
gsva_res_macrophage_sub <- gsva_res_macrophage[gene_set_names_macrophage, common_samples]
gsva_res_treg_sub <- gsva_res_treg[gene_set_names_treg, common_samples]
# Combine GSVA results into one matrix
gsva_res_combined <- rbind(
gsva_res_mono_sub,
gsva_res_macrophage_sub,
gsva_res_treg_sub
)
# Create pathway_cell_type vector
pathway_cell_type <- c(
rep("Monocyte", nrow(gsva_res_mono_sub)),
rep("Macrophage", nrow(gsva_res_macrophage_sub)),
rep("Treg", nrow(gsva_res_treg_sub))
)
# Step 3: Create Sample Annotations
# For common samples
sample_annotation <- data.frame(
group.id = colData(DESeq_macrophage)$group.id[match(common_samples, colData(DESeq_macrophage)$sample_id)],
stringsAsFactors = FALSE
)
rownames(sample_annotation) <- common_samples
# Define colors for group.id
group_levels <- unique(sample_annotation$group.id)
group_colors <- setNames(
colorRampPalette(brewer.pal(8, "Set1"))(length(group_levels)),
group_levels
)
# Create HeatmapAnnotation
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors),
annotation_legend_param = list(group.id = list(title = "Group ID"))
)
# Define color function based on data range
min_score <- min(gsva_res_combined, na.rm = TRUE)
max_score <- max(gsva_res_combined, na.rm = TRUE)
color_fun <- colorRamp2(c(min_score, 0, max_score), c("blue", "white", "red"))
# Step 4: Plot the Heatmap
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
show_column_names = TRUE,
cluster_columns = TRUE,
cluster_rows = TRUE,
column_title = "Samples",
row_title = "Pathways",
col = color_fun,
row_names_gp = gpar(fontsize = 8),
row_split = pathway_cell_type, # Split rows by cell type
cluster_column_slices = FALSE, # Cluster columns globally
cluster_row_slices = FALSE, # Cluster rows within each cell type
row_gap = unit(2, "mm")
)
# Draw the heatmap
draw(heatmap)
```
```{r}
# Load necessary libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Step 1: Define Cell Types and Their Dominant Groups
cell_types <- c("Monocyte", "Macrophage", "Treg", "OC", "Tex")
dominant_groups <- c("Mono", "MφOC", "TregTex", "OC", "Tex")
# Step 2: Specify File Paths for GSVA and DESeq Results
gsva_files <- list(
"Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs//gsva_res.C5_immune_bone.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds"
)
deseq_files <- list(
"Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds"
)
# Step 3: Initialize Lists
gsva_results <- list()
deseq_data <- list()
# Helper Function: Clean and Extract Sample IDs
extract_sample_id <- function(colnames_vector) {
cleaned_names <- tolower(colnames_vector)
cleaned_names <- gsub("[^a-z0-9_]", "", cleaned_names)
sample_ids <- sapply(strsplit(cleaned_names, "_"), function(x) tail(x, 1))
return(sample_ids)
}
# Step 4: Load Data
for (i in seq_along(cell_types)) {
cell_type <- cell_types[i]
gsva_file <- gsva_files[[cell_type]]
deseq_file <- deseq_files[[cell_type]]
if (file.exists(gsva_file)) {
gsva_res <- readRDS(gsva_file)
colnames(gsva_res) <- extract_sample_id(colnames(gsva_res))
gsva_results[[cell_type]] <- gsva_res
}
if (file.exists(deseq_file)) {
deseq_res <- readRDS(deseq_file)
colnames(deseq_res) <- extract_sample_id(colnames(deseq_res))
colData(deseq_res)$sample_id <- extract_sample_id(colnames(deseq_res))
deseq_data[[cell_type]] <- deseq_res
}
}
# Step 5: Identify Common Samples
common_samples <- Reduce(intersect, c(
lapply(gsva_results, colnames),
lapply(deseq_data, function(x) colData(x)$sample_id)
))
# Step 6: Exclude "ctrl" Samples
sample_group_ids <- data.frame(sample_id = common_samples, group.id = NA, stringsAsFactors = FALSE)
for (cell_type in cell_types) {
if (cell_type %in% names(deseq_data)) {
deseq_sub <- deseq_data[[cell_type]]
group_id <- deseq_sub$group.id
names(group_id) <- deseq_sub$sample_id
samples_in_type <- intersect(names(group_id), sample_group_ids$sample_id)
sample_group_ids$group.id[sample_group_ids$sample_id %in% samples_in_type] <- group_id[samples_in_type]
}
}
non_ctrl_samples <- sample_group_ids$sample_id[sample_group_ids$group.id != "ctrl" & !is.na(sample_group_ids$group.id)]
gsva_res_final <- lapply(gsva_results, function(x) x[, non_ctrl_samples, drop = FALSE])
# Step 7: Select Top Pathways
gsva_top_pathways <- list()
pathway_cell_type <- c()
for (i in seq_along(cell_types)) {
cell_type <- cell_types[i]
if (cell_type %in% names(gsva_res_final)) {
gsva_res_current <- gsva_res_final[[cell_type]]
mean_gsva <- rowMeans(gsva_res_current, na.rm = TRUE)
top_pathways <- names(sort(mean_gsva, decreasing = TRUE))[1:10]
gsva_top_pathways[[cell_type]] <- gsva_res_current[top_pathways, , drop = FALSE]
pathway_cell_type <- c(pathway_cell_type, rep(cell_type, length(top_pathways)))
}
}
gsva_res_combined <- do.call(rbind, gsva_top_pathways)
# Ensure Unique Row Names
rownames(gsva_res_combined) <- make.unique(rownames(gsva_res_combined))
# Step 8: Create Annotations
pathways_df <- data.frame(
pathway = rownames(gsva_res_combined),
cell_type = factor(pathway_cell_type, levels = cell_types),
stringsAsFactors = FALSE
)
sample_annotation <- data.frame(
group.id = sample_group_ids$group.id[match(colnames(gsva_res_combined), sample_group_ids$sample_id)],
stringsAsFactors = FALSE
)
rownames(sample_annotation) <- colnames(gsva_res_combined)
group_colors <- setNames(
brewer.pal(min(8, length(unique(sample_annotation$group.id))), "Set1"),
unique(sample_annotation$group.id)
)
cell_type_colors <- setNames(
brewer.pal(min(8, length(unique(pathways_df$cell_type))), "Set2"),
unique(pathways_df$cell_type)
)
# Step 9: Generate Heatmap
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors),
annotation_legend_param = list(group.id = list(title = "Group ID"))
)
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
row_title = "Pathways",
column_title = "Samples",
col = colorRamp2(c(min(gsva_res_combined), 0, max(gsva_res_combined)), c("blue", "white", "red")),
row_split = pathways_df$cell_type
)
# Draw the Heatmap
draw(heatmap)
```
```{r}
# Load necessary libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Step 1: Define Cell Types and Their Dominant Groups
cell_types <- c("Monocyte", "Macrophage", "Treg", "OC", "Tex")
dominant_groups <- c("Mono", "MφOC", "TregTex", "MφOC", "TregTex") # Corrected mapping
# Create a named vector for easy mapping
group_mapping <- setNames(dominant_groups, cell_types)
# Step 2: Specify File Paths for GSVA and DESeq Results
gsva_files <- list(
"Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds"
)
deseq_files <- list(
"Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds"
)
# Step 3: Initialize Lists
gsva_results <- list()
deseq_data <- list()
# Helper Function: Clean and Extract Sample IDs
extract_sample_id <- function(colnames_vector) {
# Modify this function based on the actual sample naming convention
cleaned_names <- tolower(colnames_vector)
cleaned_names <- gsub("[^a-z0-9_]", "", cleaned_names)
sample_ids <- sapply(strsplit(cleaned_names, "_"), function(x) tail(x, 1))
return(sample_ids)
}
# Step 4: Load Data with Debugging
for (i in seq_along(cell_types)) {
cell_type <- cell_types[i]
gsva_file <- gsva_files[[cell_type]]
deseq_file <- deseq_files[[cell_type]]
cat("Processing Cell Type:", cell_type, "\n")
# Load GSVA data
if (file.exists(gsva_file)) {
gsva_res <- readRDS(gsva_file)
original_colnames <- colnames(gsva_res)
extracted_sample_ids <- extract_sample_id(original_colnames)
# Debugging: Print sample names before and after extraction
cat("GSVA Data - Original Sample Names:\n")
print(head(original_colnames))
cat("GSVA Data - Extracted Sample IDs:\n")
print(head(extracted_sample_ids))
# Assign extracted sample IDs
colnames(gsva_res) <- extracted_sample_ids
gsva_results[[cell_type]] <- gsva_res
} else {
warning(paste("GSVA file not found for cell type:", cell_type))
}
# Load DESeq data
if (file.exists(deseq_file)) {
deseq_res <- readRDS(deseq_file)
original_colnames <- colnames(deseq_res)
extracted_sample_ids <- extract_sample_id(original_colnames)
# Debugging: Print sample names before and after extraction
cat("DESeq Data - Original Sample Names:\n")
print(head(original_colnames))
cat("DESeq Data - Extracted Sample IDs:\n")
print(head(extracted_sample_ids))
# Assign extracted sample IDs
colnames(deseq_res) <- extracted_sample_ids
colData(deseq_res)$sample_id <- extracted_sample_ids
deseq_data[[cell_type]] <- deseq_res
} else {
warning(paste("DESeq file not found for cell type:", cell_type))
}
cat("\n") # Add a newline for readability
}
# Step 5: Identify Common Samples
common_samples <- Reduce(intersect, c(
lapply(gsva_results, colnames),
lapply(deseq_data, function(x) as.character(colData(x)$sample_id)) # Ensure sample_id is character
))
cat("Common Samples:\n")
print(common_samples)
if (length(common_samples) == 0) {
stop("No common samples found across all cell types.")
}
# Step 6: Exclude "ctrl" Samples and Assign Cell Types
# Initialize a data frame with sample_id, group.id, and cell_type
sample_group_ids <- data.frame(sample_id = common_samples, group.id = NA, cell_type = NA, stringsAsFactors = FALSE)
for (cell_type in cell_types) {
if (cell_type %in% names(deseq_data)) {
deseq_sub <- deseq_data[[cell_type]]
group_id <- as.character(deseq_sub$group.id) # Convert to character to prevent factor issues
names(group_id) <- deseq_sub$sample_id
# Debugging: Print group_id assignments
cat("DESeq Data - Cell Type:", cell_type, "\n")
cat("Group ID Assignments:\n")
print(head(group_id))
samples_in_type <- intersect(names(group_id), sample_group_ids$sample_id)
sample_group_ids$group.id[sample_group_ids$sample_id %in% samples_in_type] <- group_id[samples_in_type]
sample_group_ids$cell_type[sample_group_ids$sample_id %in% samples_in_type] <- cell_type
}
}
# Debugging: Inspect sample_group_ids
cat("Sample Group Assignments:\n")
print(head(sample_group_ids))
cat("Sample Group Distribution:\n")
print(table(sample_group_ids$cell_type, sample_group_ids$group.id))
# Identify non-control samples
non_ctrl_samples <- sample_group_ids$sample_id[sample_group_ids$group.id != "ctrl" & !is.na(sample_group_ids$group.id)]
cat("Non-Control Samples:\n")
print(non_ctrl_samples)
if (length(non_ctrl_samples) == 0) {
stop("No non-control samples found.")
}
# Subset GSVA results to include only non-control samples
gsva_res_final <- lapply(gsva_results, function(x) x[, non_ctrl_samples, drop = FALSE])
# Step 7: Select Top Pathways Based on Group-Specific Rankings (Common Pathways)
gsva_top_pathways <- list()
pathway_cell_type <- c()
# Identify common pathways across all cell types
common_pathways <- Reduce(intersect, lapply(gsva_res_final, rownames))
cat("Number of Common Pathways:", length(common_pathways), "\n")
if (length(common_pathways) == 0) {
stop("No common pathways found across all cell types.")
}
# Initialize a matrix to store mean GSVA scores per cell type for common pathways
mean_gsva_matrix <- matrix(NA, nrow = length(common_pathways), ncol = length(cell_types))
rownames(mean_gsva_matrix) <- common_pathways
colnames(mean_gsva_matrix) <- cell_types
# Calculate mean GSVA scores for each cell type within its dominant group
for (cell_type in cell_types) {
if (cell_type %in% names(gsva_res_final)) {
gsva_res_current <- gsva_res_final[[cell_type]]
# Get the dominant group for the current cell type
dominant_group <- group_mapping[cell_type]
# Identify samples in gsva_res_current that belong to the dominant group and are of the current cell_type
samples_in_group <- sample_group_ids$sample_id[
sample_group_ids$group.id == dominant_group &
sample_group_ids$cell_type == cell_type
]
# Debugging: Print the number of samples found
cat("Cell Type:", cell_type, "- Dominant Group:", dominant_group, "- Samples Found:", length(samples_in_group), "\n")
# Ensure there are samples in the dominant group
if (length(samples_in_group) == 0) {
warning(paste("No samples found for group", dominant_group, "in cell type:", cell_type))
next
}
# Subset the GSVA results to only include samples from the dominant group
gsva_subset <- gsva_res_current[, samples_in_group, drop = FALSE]
# Check if common pathways are present in this cell type
valid_pathways <- intersect(common_pathways, rownames(gsva_subset))
if (length(valid_pathways) == 0) {
warning(paste("No valid common pathways found for cell type:", cell_type))
next
}
# Compute mean GSVA scores for each pathway within the dominant group
mean_gsva <- rowMeans(gsva_subset[valid_pathways, , drop = FALSE], na.rm = TRUE)
# Store the mean GSVA scores in the matrix
mean_gsva_matrix[valid_pathways, cell_type] <- mean_gsva
}
}
# Remove cell types with no mean GSVA scores
valid_cell_types <- colnames(mean_gsva_matrix)[colSums(!is.na(mean_gsva_matrix)) > 0]
mean_gsva_matrix <- mean_gsva_matrix[, valid_cell_types, drop = FALSE]
cat("Valid Cell Types with Mean GSVA Scores:\n")
print(valid_cell_types)
# Debugging: Check if mean_gsva_matrix has valid data
cat("Mean GSVA Matrix:\n")
print(head(mean_gsva_matrix))
# Calculate the average mean GSVA score across cell types for each pathway
average_mean_gsva <- rowMeans(mean_gsva_matrix, na.rm = TRUE)
# Debugging: Print average_mean_gsva
cat("Average Mean GSVA Scores:\n")
print(head(average_mean_gsva))
# Select the top 30 pathways based on the average mean GSVA scores
n_top <- min(50, length(average_mean_gsva))
top_pathways <- names(sort(average_mean_gsva, decreasing = TRUE))[1:n_top]
# Debugging: Print top pathways
cat("Selected Top Pathways:\n")
print(top_pathways)
# Subset the GSVA results to include only the top pathways
gsva_top_pathways <- lapply(gsva_res_final, function(x) {
valid_top_pathways <- intersect(top_pathways, rownames(x))
if (length(valid_top_pathways) == 0) {
return(NULL)
} else {
return(x[valid_top_pathways, , drop = FALSE])
}
})
# Remove cell types with no valid pathways
gsva_top_pathways <- gsva_top_pathways[!sapply(gsva_top_pathways, is.null)]
# Check if any pathways are left after filtering
if (length(gsva_top_pathways) == 0) {
stop("No valid pathways left after filtering.")
}
# Combine all top pathways into a single matrix
gsva_res_combined <- do.call(rbind, gsva_top_pathways)
# Ensure Unique Row Names
rownames(gsva_res_combined) <- make.unique(rownames(gsva_res_combined))
# Step 8: Create Annotations
pathways_df <- data.frame(
pathway = rownames(gsva_res_combined),
cell_type = factor(rep(valid_cell_types, each = length(top_pathways)), levels = valid_cell_types),
stringsAsFactors = FALSE
)
sample_annotation <- data.frame(
group.id = sample_group_ids$group.id[match(colnames(gsva_res_combined), sample_group_ids$sample_id)],
stringsAsFactors = FALSE
)
rownames(sample_annotation) <- colnames(gsva_res_combined)
# Debugging: Inspect annotations
cat("Sample Annotations:\n")
print(head(sample_annotation))
cat("Pathway Annotations:\n")
print(head(pathways_df))
# Define Colors for Groups
group_ids <- unique(sample_annotation$group.id)
n_groups <- length(group_ids)
# Ensure at least 3 colors for brewer.pal or handle fewer
if (n_groups >= 3) {
group_palette <- brewer.pal(min(8, n_groups), "Set1")
} else if (n_groups == 2) {
group_palette <- c("red", "blue")
} else if (n_groups == 1) {
group_palette <- "red"
}
group_colors <- setNames(
group_palette,
group_ids
)
# Define Colors for Cell Types
cell_types_with_unique <- unique(pathways_df$cell_type)
n_cell_types <- length(cell_types_with_unique)
# Handle color palette based on the number of cell types
if (n_cell_types >= 3) {
cell_type_palette <- brewer.pal(min(8, n_cell_types), "Set2")
} else if (n_cell_types == 2) {
cell_type_palette <- c("lightblue", "lightgreen")
} else if (n_cell_types == 1) {
cell_type_palette <- "lightblue"
}
cell_type_colors <- setNames(
cell_type_palette,
cell_types_with_unique
)
# Add row annotation for cell types
row_annotation <- rowAnnotation(
Cell_Type = pathways_df$cell_type,
col = list(Cell_Type = cell_type_colors),
annotation_legend_param = list(Cell_Type = list(title = "Cell Type"))
)
# Step 9: Generate Heatmap
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors),
annotation_legend_param = list(group.id = list(title = "Group ID"))
)
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
right_annotation = row_annotation, # Add row annotation for cell types
row_title = "Pathways",
column_title = "Samples",
col = colorRamp2(c(min(gsva_res_combined, na.rm = TRUE), 0, max(gsva_res_combined, na.rm = TRUE)), c("blue", "white", "red")),
row_split = pathways_df$cell_type,
show_row_names = TRUE,
show_column_names = FALSE
)
# Draw the Heatmap
draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right")
pdf("./outs/GSVA_by_celltype_summarized/gsva_heatmap_all_pathways.50.pdf", width = 14, height = 30) # Width and height in inches
# Draw the heatmap with legends on the right
draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right")
# Close the graphics device
dev.off()
#write.csv(gsva_res_combined, './outs/GSVA_by_celltype_summarized/gsva_heatmap_all_pathways.50.csv')
```
```{r}
# Load necessary libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Step 1: Define Cell Types and Their Dominant Groups
cell_types <- c("Monocyte", "Macrophage", "Treg", "OC", "Tex")
dominant_groups <- c("Mono", "MφOC", "TregTex", "OC", "Tex")
# Step 2: Specify File Paths for GSVA and DESeq Results
gsva_files <- list(
"Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds"
)
deseq_files <- list(
"Monocyte" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds"
)
# Step 3: Initialize Lists
gsva_results <- list()
deseq_data <- list()
# Helper Function: Clean and Extract Sample IDs
extract_sample_id <- function(colnames_vector) {
cleaned_names <- tolower(colnames_vector)
cleaned_names <- gsub("[^a-z0-9_]", "", cleaned_names)
sample_ids <- sapply(strsplit(cleaned_names, "_"), function(x) tail(x, 1))
return(sample_ids)
}
# Step 4: Load Data
for (i in seq_along(cell_types)) {
cell_type <- cell_types[i]
gsva_file <- gsva_files[[cell_type]]
deseq_file <- deseq_files[[cell_type]]
if (file.exists(gsva_file)) {
gsva_res <- readRDS(gsva_file)
colnames(gsva_res) <- extract_sample_id(colnames(gsva_res))
gsva_results[[cell_type]] <- gsva_res
}
if (file.exists(deseq_file)) {
deseq_res <- readRDS(deseq_file)
colnames(deseq_res) <- extract_sample_id(colnames(deseq_res))
colData(deseq_res)$sample_id <- extract_sample_id(colnames(deseq_res))
deseq_data[[cell_type]] <- deseq_res
}
}
# Step 5: Identify Common Samples
common_samples <- Reduce(intersect, c(
lapply(gsva_results, colnames),
lapply(deseq_data, function(x) colData(x)$sample_id)
))
# Step 6: Exclude "ctrl" Samples
sample_group_ids <- data.frame(sample_id = common_samples, group.id = NA, stringsAsFactors = FALSE)
for (cell_type in cell_types) {
if (cell_type %in% names(deseq_data)) {
deseq_sub <- deseq_data[[cell_type]]
group_id <- deseq_sub$group.id
names(group_id) <- deseq_sub$sample_id
samples_in_type <- intersect(names(group_id), sample_group_ids$sample_id)
sample_group_ids$group.id[sample_group_ids$sample_id %in% samples_in_type] <- group_id[samples_in_type]
}
}
non_ctrl_samples <- sample_group_ids$sample_id[sample_group_ids$group.id != "ctrl" & !is.na(sample_group_ids$group.id)]
gsva_res_final <- lapply(gsva_results, function(x) x[, non_ctrl_samples, drop = FALSE])
# Step 7: Select Unique Top Pathways
gsva_top_pathways <- list()
pathway_cell_type <- c()
# Identify all pathways per cell type
pathways_per_cell_type <- lapply(gsva_res_final, rownames)
# Identify unique pathways for each cell type
unique_pathways <- list()
for (cell_type in cell_types) {
other_cell_types <- setdiff(cell_types, cell_type)
other_pathways <- unique(unlist(pathways_per_cell_type[other_cell_types]))
unique_pathways[[cell_type]] <- setdiff(pathways_per_cell_type[[cell_type]], other_pathways)
}
# Select top pathways from the unique set
for (cell_type in cell_types) {
if (cell_type %in% names(gsva_res_final)) {
unique_ps <- unique_pathways[[cell_type]]
if (length(unique_ps) == 0) {
warning(paste("No unique pathways found for cell type:", cell_type))
next
}
gsva_res_current <- gsva_res_final[[cell_type]]
# Filter to unique pathways
gsva_unique <- gsva_res_current[unique_ps, , drop = FALSE]
# Compute mean GSVA score
mean_gsva <- rowMeans(gsva_unique, na.rm = TRUE)
# Determine the number of top pathways to select (up to 10)
n_top <- min(50, length(mean_gsva))
top_pathways <- names(sort(mean_gsva, decreasing = TRUE))[1:n_top]
gsva_top_pathways[[cell_type]] <- gsva_unique[top_pathways, , drop = FALSE]
pathway_cell_type <- c(pathway_cell_type, rep(cell_type, length(top_pathways)))
}
}
# Check if any unique pathways were selected
if (length(gsva_top_pathways) == 0) {
stop("No unique pathways were selected for any cell type.")
}
# Combine all unique top pathways into a single matrix
gsva_res_combined <- do.call(rbind, gsva_top_pathways)
# Ensure Unique Row Names
rownames(gsva_res_combined) <- make.unique(rownames(gsva_res_combined))
# Step 8: Create Annotations
pathways_df <- data.frame(
pathway = rownames(gsva_res_combined),
cell_type = factor(pathway_cell_type, levels = cell_types),
stringsAsFactors = FALSE
)
sample_annotation <- data.frame(
group.id = sample_group_ids$group.id[match(colnames(gsva_res_combined), sample_group_ids$sample_id)],
stringsAsFactors = FALSE
)
rownames(sample_annotation) <- colnames(gsva_res_combined)
# Define Colors for Groups
group_ids <- unique(sample_annotation$group.id)
n_groups <- length(group_ids)
# Ensure at least 3 colors for brewer.pal or handle fewer
if (n_groups >= 3) {
group_palette <- brewer.pal(min(8, n_groups), "Set1")
} else if (n_groups == 2) {
group_palette <- c("red", "blue")
} else if (n_groups == 1) {
group_palette <- "red"
}
group_colors <- setNames(
group_palette,
group_ids
)
# Define Colors for Cell Types
cell_types_with_unique <- unique(pathways_df$cell_type)
n_cell_types <- length(cell_types_with_unique)
# Handle color palette based on the number of cell types
if (n_cell_types >= 3) {
cell_type_palette <- brewer.pal(min(8, n_cell_types), "Set2")
} else if (n_cell_types == 2) {
cell_type_palette <- c("lightblue", "lightgreen")
} else if (n_cell_types == 1) {
cell_type_palette <- "lightblue"
}
cell_type_colors <- setNames(
cell_type_palette,
cell_types_with_unique
)
# Add row annotation for cell types
row_annotation <- rowAnnotation(
Cell_Type = pathways_df$cell_type,
col = list(Cell_Type = cell_type_colors),
annotation_legend_param = list(Cell_Type = list(title = "Cell Type"))
)
# Step 9: Generate Heatmap
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(group.id = group_colors),
annotation_legend_param = list(group.id = list(title = "Group ID"))
)
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
right_annotation = row_annotation, # Add row annotation for cell types
row_title = "Pathways",
column_title = "Samples",
col = colorRamp2(c(min(gsva_res_combined, na.rm = TRUE), 0, max(gsva_res_combined, na.rm = TRUE)), c("blue", "white", "red")),
row_split = pathways_df$cell_type,
show_row_names = TRUE,
show_column_names = FALSE
)
# Draw the Heatmap
draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right")
```
```{r}
# Load necessary libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
library(dplyr)
# Function to normalize pathway names
normalize_pathway_names <- function(pathway_names) {
pathway_names <- toupper(pathway_names)
pathway_names <- trimws(pathway_names)
pathway_names <- gsub("[^A-Z0-9_]", "_", pathway_names)
return(pathway_names)
}
# Step 1: Define Cell Types and Dominant Groups
cell_types <- c("Mono", "Macrophage", "OC", "Treg", "Tex")
dominant_groups <- c("Mono", "MφOC", "MφOC", "TregTex", "TregTex") # 5 elements corresponding to cell_types
# Step 2: Specify File Paths for GSVA and DESeq Datasets
gsva_files <- list(
"Mono" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds"
)
deseq_files <- list(
"Mono" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds"
)
# Step 3: Load GSVA and DESeq Data for Each Cell Type
gsva_results <- list()
deseq_data <- list()
for (i in seq_along(cell_types)) {
cell_type <- cell_types[i]
dominant_group <- dominant_groups[i]
cat("Loading data for cell type:", cell_type, "\n")
# Load GSVA results
gsva_file <- gsva_files[[cell_type]]
if (!file.exists(gsva_file)) {
warning(paste("GSVA file not found for cell type:", cell_type, "at", gsva_file))
next # Skip to the next cell type
}
gsva_res <- readRDS(gsva_file)
# Load DESeq data
deseq_file <- deseq_files[[cell_type]]
if (!file.exists(deseq_file)) {
warning(paste("DESeq file not found for cell type:", cell_type, "at", deseq_file))
next # Skip to the next cell type
}
deseq_res <- readRDS(deseq_file)
# Process sample names for GSVA results
colnames(gsva_res) <- tolower(colnames(gsva_res))
colnames(gsva_res) <- gsub("[^a-z0-9_]", "", colnames(gsva_res))
colnames(gsva_res) <- sapply(strsplit(colnames(gsva_res), "_"), function(x) tail(x, 1))
# Process sample names for DESeq data
colnames(deseq_res) <- tolower(colnames(deseq_res))
colnames(deseq_res) <- gsub("[^a-z0-9_]", "", colnames(deseq_res))
colData(deseq_res)$sample_id <- sapply(strsplit(colnames(deseq_res), "_"), function(x) tail(x, 1))
# Store in lists
gsva_results[[cell_type]] <- gsva_res
deseq_data[[cell_type]] <- deseq_res
}
# Step 4: Find Common Samples Across All Cell Types
cat("\nIdentifying common samples across all cell types...\n")
gsva_sample_lists <- lapply(gsva_results, colnames)
common_samples <- Reduce(intersect, gsva_sample_lists)
# Ensure samples are present in DESeq data for each cell type
for (cell_type in cell_types) {
if (!(cell_type %in% names(deseq_data))) {
warning(paste("DESeq data missing for cell type:", cell_type))
next
}
deseq_sample_ids <- colData(deseq_data[[cell_type]])$sample_id
common_samples <- intersect(common_samples, deseq_sample_ids)
}
# Check if common_samples is non-empty
if (length(common_samples) == 0) {
stop("No common samples found among all cell types.")
} else {
cat("Number of common samples:", length(common_samples), "\n")
}
# Subset the GSVA results to include only common samples
gsva_res_sub <- lapply(gsva_results, function(x) x[, common_samples, drop = FALSE])
# Step 5: Select Top 30 Pathways Enriched in Each Cell Type
cat("\nSelecting top 30 pathways for each cell type based on dominant groups...\n")
gsva_top_pathways <- list()
pathway_cell_type <- c()
for (i in seq_along(cell_types)) {
cell_type <- cell_types[i]
dominant_group <- dominant_groups[i]
cat("\nProcessing cell type:", cell_type, "\n")
# Check if data exists for the cell type
if (!(cell_type %in% names(gsva_res_sub)) | !(cell_type %in% names(deseq_data))) {
warning(paste("Data missing for cell type:", cell_type))
# Add a dummy pathway to create an empty section
dummy_pathway <- paste0("No_Pathways_", cell_type)
gsva_top_pathways[[cell_type]] <- matrix(NA, nrow = 1, ncol = length(common_samples))
rownames(gsva_top_pathways[[cell_type]]) <- dummy_pathway
pathway_cell_type <- c(pathway_cell_type, cell_type)
next # Move to the next cell type
}
gsva_res_current <- gsva_res_sub[[cell_type]]
deseq_current <- deseq_data[[cell_type]]
# Get sample IDs for the dominant group
group_samples <- deseq_current$sample_id[deseq_current$group.id == dominant_group]
group_samples <- intersect(group_samples, common_samples)
cat("Number of samples in dominant group (", dominant_group, "): ", length(group_samples), "\n", sep = "")
# Check if group_samples is empty
if (length(group_samples) == 0) {
warning(paste("No samples found for dominant group", dominant_group, "in cell type", cell_type))
# Add a dummy pathway to create an empty section
dummy_pathway <- paste0("No_Pathways_", cell_type)
gsva_top_pathways[[cell_type]] <- matrix(NA, nrow = 1, ncol = length(common_samples))
rownames(gsva_top_pathways[[cell_type]]) <- dummy_pathway
pathway_cell_type <- c(pathway_cell_type, cell_type)
next # Skip to the next cell type
}
# Compute mean GSVA scores for each pathway
mean_gsva <- rowMeans(gsva_res_current[, group_samples, drop = FALSE], na.rm = TRUE)
# Remove pathways with NA mean scores
mean_gsva <- mean_gsva[!is.na(mean_gsva)]
if (length(mean_gsva) == 0) {
warning(paste("No valid GSVA scores for cell type", cell_type))
# Add a dummy pathway
dummy_pathway <- paste0("No_Pathways_", cell_type)
gsva_top_pathways[[cell_type]] <- matrix(NA, nrow = 1, ncol = length(common_samples))
rownames(gsva_top_pathways[[cell_type]]) <- dummy_pathway
pathway_cell_type <- c(pathway_cell_type, cell_type)
next
}
# Select top 30 pathways
num_pathways <- min(10, length(mean_gsva))
top_pathways <- names(sort(mean_gsva, decreasing = TRUE))[1:num_pathways]
cat("Selected top", length(top_pathways), "pathways for cell type", cell_type, "\n")
# Check if pathways exist in GSVA results
missing_pathways <- setdiff(top_pathways, rownames(gsva_res_current))
if (length(missing_pathways) > 0) {
warning(paste("Some top pathways are missing for cell type", cell_type, ":", paste(missing_pathways, collapse = ", ")))
# Remove missing pathways from top_pathways
top_pathways <- intersect(top_pathways, rownames(gsva_res_current))
}
# If no pathways remain, add dummy
if (length(top_pathways) == 0) {
warning(paste("No valid pathways for cell type", cell_type))
dummy_pathway <- paste0("No_Pathways_", cell_type)
gsva_top_pathways[[cell_type]] <- matrix(NA, nrow = 1, ncol = length(common_samples))
rownames(gsva_top_pathways[[cell_type]]) <- dummy_pathway
pathway_cell_type <- c(pathway_cell_type, cell_type)
next
}
# Subset GSVA results to top pathways
gsva_top_subset <- gsva_res_current[top_pathways, ]
# Store in list
gsva_top_pathways[[cell_type]] <- gsva_top_subset
# Record pathway_cell_type
pathway_cell_type <- c(pathway_cell_type, rep(cell_type, length(top_pathways)))
}
# Step 6: Combine Pathways into One Matrix
cat("\nCombining top pathways from all cell types into a single matrix...\n")
gsva_res_combined <- do.call(rbind, gsva_top_pathways)
# Create a data frame for pathway annotations
pathways_df <- data.frame(
pathway = rownames(gsva_res_combined),
cell_type = pathway_cell_type,
stringsAsFactors = FALSE
)
# Define the order of cell types
cell_type_order <- c("Mono", "Macrophage", "OC", "Treg", "Tex")
# Make cell_type a factor with specified order
pathways_df$cell_type <- factor(pathways_df$cell_type, levels = cell_type_order)
# Arrange pathways by cell type
pathways_df <- pathways_df %>%
arrange(cell_type)
# Reorder gsva_res_combined accordingly
gsva_res_combined <- gsva_res_combined[pathways_df$pathway, ]
# Update pathway_cell_type
pathway_cell_type <- pathways_df$cell_type
# Step 7: Create Sample Annotations
cat("\nCreating sample annotations...\n")
# Create sample annotation data frame using group IDs from "Macrophage"
if (!("Macrophage" %in% names(deseq_data))) {
stop("DESeq data for 'Macrophage' is missing.")
}
sample_annotation <- data.frame(
group.id = deseq_data[["Macrophage"]]$group.id[match(common_samples, deseq_data[["Macrophage"]]$sample_id)],
stringsAsFactors = FALSE
)
rownames(sample_annotation) <- common_samples
# Replace "Mo" with "Mono" in group.id
sample_annotation$group.id[sample_annotation$group.id == "Mo"] <- "Mono"
# Assign "Unknown" to samples not in desired groups without excluding them
desired_groups <- c("Mono", "MφOC", "TregTex")
sample_annotation$group.id <- ifelse(sample_annotation$group.id %in% desired_groups,
sample_annotation$group.id, "Unknown")
# Convert group.id to factor with specified levels, including "Unknown"
desired_groups_extended <- c(desired_groups, "Unknown")
sample_annotation$group.id <- factor(sample_annotation$group.id, levels = desired_groups_extended)
# Define colors for group.id based on levels
group_levels <- levels(sample_annotation$group.id)
num_groups <- length(group_levels)
# Define colors using RColorBrewer's Set1 palette
group_colors <- setNames(
brewer.pal(n = num_groups, name = "Set1"),
group_levels
)
# Assign a specific color to "Unknown" if desired (e.g., grey)
if ("Unknown" %in% group_levels) {
group_colors["Unknown"] <- "grey"
}
# Replace 'MφOC' with 'MphiOC' to handle special character issues (optional)
sample_annotation$group.id <- as.character(sample_annotation$group.id) # Convert to character
sample_annotation$group.id <- gsub("MφOC", "MphiOC", sample_annotation$group.id)
sample_annotation$group.id <- factor(sample_annotation$group.id, levels = c("Mono", "MphiOC", "TregTex", "Unknown"))
# Update group_colors accordingly
group_colors <- setNames(
brewer.pal(n = length(levels(sample_annotation$group.id)), name = "Set1"),
levels(sample_annotation$group.id)
)
# Assign a distinct color to "Unknown" if desired
if ("Unknown" %in% levels(sample_annotation$group.id)) {
group_colors["Unknown"] <- "grey"
}
# Reorder sample_annotation to match gsva_res_combined column order
if (!all(colnames(gsva_res_combined) == rownames(sample_annotation))) {
cat("Reordering sample_annotation to match gsva_res_combined column order...\n")
sample_annotation <- sample_annotation[colnames(gsva_res_combined), , drop = FALSE]
}
# Verify alignment
alignment <- all(colnames(gsva_res_combined) == rownames(sample_annotation))
cat("Do sample_annotation row names match gsva_res_combined column names? ", alignment, "\n")
if (!alignment) {
stop("Mismatch between gsva_res_combined columns and sample_annotation row names.")
} else {
cat("Sample annotations successfully aligned with heatmap columns.\n")
}
# Print group.id levels and colors for verification
cat("Levels of group.id:\n")
print(levels(sample_annotation$group.id))
cat("Group Colors Mapping:\n")
print(group_colors)
# Check for NA values in group.id after handling
na_count <- sum(is.na(sample_annotation$group.id))
cat("Number of NA values in group.id after handling:", na_count, "\n")
# Ensure no NAs remain
if (na_count > 0) {
warning("There are NA values in group.id. Assigning them to 'Unknown'.")
sample_annotation$group.id[is.na(sample_annotation$group.id)] <- "Unknown"
# Re-convert to factor including 'Unknown'
sample_annotation$group.id <- factor(sample_annotation$group.id, levels = c("Mono", "MphiOC", "TregTex", "Unknown"))
}
# Create HeatmapAnnotation for sample annotations
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(
group.id = group_colors
),
annotation_legend_param = list(
group.id = list(title = "Group ID")
)
)
# Step 8: Plot the Heatmap
cat("\nGenerating the heatmap...\n")
# Define color function based on data range and handle NA values
color_fun <- colorRamp2(c(min(gsva_res_combined, na.rm = TRUE),
0,
max(gsva_res_combined, na.rm = TRUE)),
c("blue", "white", "red"))
# Define colors for cell types in row annotations
cell_type_colors <- setNames(
brewer.pal(n = length(cell_type_order), name = "Dark2"),
cell_type_order
)
# Create row annotation for cell types
row_annotation <- rowAnnotation(
Cell_Type = pathway_cell_type,
col = list(Cell_Type = cell_type_colors),
show_annotation_name = TRUE,
annotation_legend_param = list(Cell_Type = list(title = "Cell Type"))
)
# Create the heatmap with row_split and row_annotation
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
right_annotation = row_annotation, # Add row annotations
show_column_names = FALSE,
cluster_columns = TRUE,
cluster_rows = FALSE, # Rows are ordered manually
column_title = "Samples",
row_title = "Pathways",
col = color_fun,
row_names_gp = gpar(fontsize = 8),
row_split = pathway_cell_type, # Split rows by cell type
row_order = order(pathway_cell_type, decreasing = FALSE),
row_gap = unit(2, "mm"),
row_title_rot = 0,
row_title_gp = gpar(fontsize = 10, fontface = "bold"),
na_col = "gray" # Color for NA values (dummy pathways)
)
# Draw the heatmap with legends on the right
draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right")
#write.csv(gsva_res_combined, './outs/GSVA_by_celltype_summarized/gsva_res_combined.csv')
```
```{r}
# Load necessary libraries
library(ComplexHeatmap)
library(circlize)
library(RColorBrewer)
# Step 1: Load and Prepare Data
# Define cell types and their corresponding dominant groups
cell_types <- c("Mono", "Macrophage", "OC", "Treg", "Tex")
dominant_groups <- c("Mono", "MφOC", "MφOC", "TregTex", "TregTex") # 5 elements corresponding to cell_types
# Specify file paths for each cell type's GSVA and DESeq results
gsva_files <- list(
"Mono" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/gsva_res.C5_immune_bone.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/gsva_res.C5_immune_bone.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/gsva_res.C5_immune_bone.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/gsva_res.C5_immune_bone.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/gsva_res.C5_immune_bone.rds"
)
deseq_files <- list(
"Mono" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_mono/DESeq.rds",
"Macrophage" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_macs/DESeq.rds",
"OC" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_oc/DESeq.rds",
"Treg" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_treg/DESeq.rds",
"Tex" = "~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Lab/Yunfeng/cancer_comparsions/pseudobulk/outs/GSVA_tex/DESeq.rds"
)
# Initialize lists to store GSVA results and DESeq data
gsva_results <- list()
deseq_data <- list()
# Load GSVA results and DESeq data for each cell type
for (i in seq_along(cell_types)) {
cell_type <- cell_types[i]
dominant_group <- dominant_groups[i]
cat("Loading data for cell type:", cell_type, "\n")
# Load GSVA results
gsva_file <- gsva_files[[cell_type]]
if (!file.exists(gsva_file)) {
warning(paste("GSVA file not found for cell type:", cell_type, "at", gsva_file))
next # Skip to the next cell type
}
gsva_res <- readRDS(gsva_file)
# Load DESeq data
deseq_file <- deseq_files[[cell_type]]
if (!file.exists(deseq_file)) {
warning(paste("DESeq file not found for cell type:", cell_type, "at", deseq_file))
next # Skip to the next cell type
}
deseq_res <- readRDS(deseq_file)
# Process sample names to extract sample IDs for GSVA results
colnames(gsva_res) <- tolower(colnames(gsva_res))
colnames(gsva_res) <- gsub("[^a-z0-9_]", "", colnames(gsva_res))
colnames(gsva_res) <- sapply(strsplit(colnames(gsva_res), "_"), function(x) tail(x, 1))
# Process sample names to extract sample IDs for DESeq data
colnames(deseq_res) <- tolower(colnames(deseq_res))
colnames(deseq_res) <- gsub("[^a-z0-9_]", "", colnames(deseq_res))
colData(deseq_res)$sample_id <- sapply(strsplit(colnames(deseq_res), "_"), function(x) tail(x, 1))
# Store in lists
gsva_results[[cell_type]] <- gsva_res
deseq_data[[cell_type]] <- deseq_res
}
# Step 2: Find Common Samples
# Identify common samples across all GSVA datasets
gsva_sample_lists <- lapply(gsva_results, colnames)
common_samples <- Reduce(intersect, gsva_sample_lists)
# Ensure samples are present in DESeq data for each cell type
for (cell_type in cell_types) {
if (!(cell_type %in% names(deseq_data))) {
warning(paste("DESeq data missing for cell type:", cell_type))
next
}
deseq_sample_ids <- colData(deseq_data[[cell_type]])$sample_id
common_samples <- intersect(common_samples, deseq_sample_ids)
}
# Check if common_samples is non-empty
if (length(common_samples) == 0) {
stop("No common samples found among all cell types.")
} else {
cat("Number of common samples:", length(common_samples), "\n")
}
# Subset the GSVA results to include only common samples
gsva_res_sub <- lapply(gsva_results, function(x) x[, common_samples, drop = FALSE])
# Step 3: Select Top Pathways Enriched in Each Cell Type
# Initialize lists to store top pathways and pathway annotations
gsva_top_pathways <- list()
pathway_cell_type <- c()
for (i in seq_along(cell_types)) {
cell_type <- cell_types[i]
dominant_group <- dominant_groups[i]
cat("\nProcessing cell type:", cell_type, "\n")
# Check if data exists for the cell type
if (!(cell_type %in% names(gsva_res_sub)) | !(cell_type %in% names(deseq_data))) {
warning(paste("Data missing for cell type:", cell_type))
next # Skip to the next cell type
}
gsva_res_current <- gsva_res_sub[[cell_type]]
deseq_current <- deseq_data[[cell_type]]
# Get sample IDs for the dominant group
group_samples <- deseq_current$sample_id[deseq_current$group.id == dominant_group]
group_samples <- intersect(group_samples, common_samples)
# Check if group_samples is empty
if (length(group_samples) == 0) {
warning(paste("No samples found for dominant group", dominant_group, "in cell type", cell_type))
next # Skip to the next cell type
}
# Compute mean GSVA scores for each pathway
mean_gsva <- rowMeans(gsva_res_current[, group_samples, drop = FALSE], na.rm = TRUE)
# Handle cases where mean_gsva is NA
mean_gsva <- mean_gsva[!is.na(mean_gsva)]
if (length(mean_gsva) == 0) {
warning(paste("No valid GSVA scores for cell type", cell_type))
next
}
# Select top pathways (e.g., top 100 or all if fewer)
num_pathways <- min(50, length(mean_gsva)) # Adjust the number as needed
top_pathways <- names(sort(mean_gsva, decreasing = TRUE))[1:num_pathways]
# Check if pathways exist in GSVA results
missing_pathways <- setdiff(top_pathways, rownames(gsva_res_current))
if (length(missing_pathways) > 0) {
warning(paste("Some top pathways are missing for cell type", cell_type, ":", paste(missing_pathways, collapse = ", ")))
# Remove missing pathways from top_pathways
top_pathways <- intersect(top_pathways, rownames(gsva_res_current))
}
# If no pathways remain, skip
if (length(top_pathways) == 0) {
warning(paste("No valid pathways for cell type", cell_type))
next
}
# Subset GSVA results to top pathways
gsva_top_subset <- gsva_res_current[top_pathways, ]
# Store in list
gsva_top_pathways[[cell_type]] <- gsva_top_subset
# Record pathway_cell_type
pathway_cell_type <- c(pathway_cell_type, rep(cell_type, length(top_pathways)))
}
# Step 4: Combine Pathways into One Matrix
# Combine all top pathways into one matrix
gsva_res_combined <- do.call(rbind, gsva_top_pathways)
# Create a data frame for pathway annotations
pathways_df <- data.frame(
pathway = rownames(gsva_res_combined),
cell_type = pathway_cell_type,
stringsAsFactors = FALSE
)
# Define the order of cell types
cell_type_order <- c("Mono", "Macrophage", "OC", "Treg", "Tex")
# Make cell_type a factor with specified order
pathways_df$cell_type <- factor(pathways_df$cell_type, levels = cell_type_order)
# Arrange pathways by cell type
pathways_df <- pathways_df[order(pathways_df$cell_type), ]
# Reorder gsva_res_combined accordingly
gsva_res_combined <- gsva_res_combined[pathways_df$pathway, ]
# Update pathway_cell_type
pathway_cell_type <- pathways_df$cell_type
# Step 5: Create Sample Annotations
# Create sample annotation data frame using group IDs from a reference cell type (e.g., "Macrophage")
# Assuming group.id is consistent across cell types; adjust if necessary
reference_cell_type <- "Macrophage"
if (!(reference_cell_type %in% names(deseq_data))) {
stop(paste("DESeq data for", reference_cell_type, "is missing."))
}
sample_annotation <- data.frame(
group.id = deseq_data[[reference_cell_type]]$group.id[match(common_samples, deseq_data[[reference_cell_type]]$sample_id)],
stringsAsFactors = FALSE
)
rownames(sample_annotation) <- common_samples
# Replace any unexpected group IDs with "Unknown" (optional)
# Modify this section based on your actual group IDs and desired handling
sample_annotation$group.id <- ifelse(sample_annotation$group.id %in% c("Mono", "MφOC", "TregTex"),
sample_annotation$group.id, "Unknown")
# Define colors for group.id
group_levels <- unique(sample_annotation$group.id)
group_colors <- setNames(
brewer.pal(min(length(group_levels), 8), "Set1"), # Use Set1 palette with up to 8 colors
group_levels
)
# Create HeatmapAnnotation
ha <- HeatmapAnnotation(
df = sample_annotation,
col = list(
group.id = group_colors
),
annotation_legend_param = list(
group.id = list(title = "Group ID")
)
)
# Step 6: Plot the Heatmap
# Define color function based on data range and handle NA values
color_fun <- colorRamp2(c(min(gsva_res_combined, na.rm = TRUE),
0,
max(gsva_res_combined, na.rm = TRUE)),
c("blue", "white", "red"))
# Create the heatmap with adjusted figure size
heatmap <- Heatmap(
gsva_res_combined,
name = "GSVA Score",
top_annotation = ha,
show_column_names = FALSE,
cluster_columns = TRUE,
cluster_rows = FALSE, # Rows are ordered manually by cell type
column_title = "Samples",
row_title = "Pathways",
col = color_fun,
row_names_gp = gpar(fontsize = 8),
row_split = pathway_cell_type, # Split rows by cell type
row_order = order(pathway_cell_type, decreasing = FALSE),
row_title_rot = 0,
row_title_gp = gpar(fontsize = 3, fontface = "bold"),
na_col = "gray" # Color for NA values (if any)
)
# Save the heatmap to a PDF with specified size
#pdf("./outs/GSVA_by_celltype_summarized/gsva_res_combined_imm_bone_top_50_for_10.pdf", width = 10, height = 8) # Width and height in inches
# Draw the heatmap with legends on the right
draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right")
# Close the graphics device
#dev.off()
# Alternatively, to display in the R plotting window with adjusted size:
# Adjust the plotting window size manually or programmatically
# windows(width = 14, height = 30) # For Windows OS
# quartz(width = 14, height = 30) # For macOS
# x11(width = 14, height = 30) # For Linux or cross-platform
# Draw the heatmap
#draw(heatmap, heatmap_legend_side = "right", annotation_legend_side = "right")
write.csv(gsva_res_combined, './outs/GSVA_by_celltype_summarized/gsva_res_combined_imm_bone_top_50_for_10.csv')
```