---
title: "GSE124647"
output: html_document
date: "2024-09-17"
---
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
rm(list=ls())
```
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
library(RColorBrewer)
library(viridis)
load("/Volumes/Fengshuo_14T/Lab/Yunfeng/cancer_comparsions/Zenodo/data/Bulk_Microarray_Data(published)/PMID-31231679.RData")
marker <- readRDS("./data/Bulk_Microarray_Data(published)/cell_type_markers_from_scRNA_data_for_estimation.rds")
# Assuming 'gse124627.clps' is your expression data matrix with genes as rows and samples as columns
# Assuming 'marker' is your data frame with cell type markers
# Assuming 'gse124627.ann' is your new sample annotation data frame
expression_matrix <- gse124627.clps
# Prepare gene names
rownames(expression_matrix) <- toupper(rownames(expression_matrix))
# Split the marker data into cell types and only keep the top 10 genes per cell type
cell_type_markers <- split(marker$gene, marker$cluster)
cell_type_markers <- lapply(cell_type_markers, function(genes) {
toupper(genes[1:min(10, length(genes))]) # Keep top 10 genes
})
# Run ssGSEA to calculate enrichment scores using all cell types
ssgsea_scores_all <- gsva(expression_matrix, cell_type_markers, method='ssgsea', ssgsea.norm=TRUE)
# Specify cell types to keep
cell_types_to_keep <- c("pro Mono", "CD14hi Mono", "CD16hi Mono", "OC", "Mφ", "CD4 Treg", "CD8 Tex")
# Trim whitespace from row names and filter the ssGSEA scores
rownames(ssgsea_scores_all) <- trimws(rownames(ssgsea_scores_all))
cell_types_to_keep <- trimws(cell_types_to_keep)
ssgsea_scores_filtered <- ssgsea_scores_all[cell_types_to_keep, , drop=FALSE]
# Reorder the samples based on new sample annotation 'gse124627.ann'
common_samples <- intersect(colnames(ssgsea_scores_filtered), rownames(gse124627.ann))
gse124627.ann <- gse124627.ann[common_samples, ]
ssgsea_scores_filtered <- ssgsea_scores_filtered[, common_samples]
# Scale the data after filtering
ssgsea_scores_scaled <- t(scale(t(ssgsea_scores_filtered)))
# Create ranges for PFS and oas.months and convert them to factors
gse124627.ann$PFS <- cut(as.numeric(gse124627.ann$PFS), breaks=c(0, 5, 10, 15, 20, 25, 30, 100), include.lowest=TRUE)
gse124627.ann$oas.months <- cut(as.numeric(gse124627.ann$oas.months), breaks=c(0, 5, 10, 15, 20, 40, 60, 80, 100, 120, 140), include.lowest=TRUE)
# Make sure PFS and oas.months are factors
gse124627.ann$PFS <- as.factor(gse124627.ann$PFS)
gse124627.ann$oas.months <- as.factor(gse124627.ann$oas.months)
# Generate heatmap
# Adjust the scale range to -1.5 to 1.5
breaks <- seq(-1.5, 1.5, length.out = 100)
# Create annotations for the samples (Met.Site and other annotations)
sample_annotations <- data.frame(Met.Site = gse124627.ann$site,
PFS = gse124627.ann$PFS,
oas.months = gse124627.ann$oas.months)
rownames(sample_annotations) <- rownames(gse124627.ann)
# Generate color palette for Met.Site based on the number of unique values
unique_met_sites <- unique(gse124627.ann$site)
num_met_sites <- length(unique_met_sites)
# Check if num_met_sites is greater than 10, if so, generate more colors
met_site_colors <- if (num_met_sites > 10) {
colorRampPalette(brewer.pal(9, "Paired"))(num_met_sites) # Generate more colors if needed
} else {
brewer.pal(10, "Paired")
}
# Assign colors for each unique Met.Site
names(met_site_colors) <- unique_met_sites
# Generate colors for PFS and oas.months with reversed color gradients
pfs_colors <- viridis_pal(option = "inferno", direction = -1)(length(levels(gse124627.ann$PFS))) # Reverse inferno
oas_colors <- viridis_pal(option = "viridis", direction = -1)(length(levels(gse124627.ann$oas.months))) # Reverse viridis
# Specify color palettes for annotations
annotation_colors <- list(
Met.Site = met_site_colors, # Custom palette for Met.Site
PFS = setNames(pfs_colors, levels(gse124627.ann$PFS)), # Ensure colors match PFS factor levels with reversed inferno
oas.months = setNames(oas_colors, levels(gse124627.ann$oas.months)) # Ensure colors match oas.months factor levels with reversed viridis
)
# Generate the heatmap with annotations and custom color palettes
pheatmap(ssgsea_scores_scaled,
breaks = breaks,
clustering_distance_rows = 'correlation',
clustering_distance_cols = 'correlation',
clustering_method = 'complete',
show_colnames = TRUE,
show_rownames = TRUE,
cluster_rows = F,
cluster_cols = TRUE,
fontsize_row = 10,
fontsize_col = 10,
annotation_col = sample_annotations,
annotation_colors = annotation_colors)
```
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
library(RColorBrewer)
library(viridis)
load("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/Breast - GSE124647.RData")
# Assuming 'gse124627.clps' is your expression data matrix with genes as rows and samples as columns
# Assuming 'marker' is your data frame with cell type markers
# Assuming 'gse124627.ann' is your new sample annotation data frame
expression_matrix <- gse124627.clps
# Prepare gene names
rownames(expression_matrix) <- toupper(rownames(expression_matrix))
# Split the marker data into cell types and only keep the top 10 genes per cell type
cell_type_markers <- split(marker$gene, marker$cluster)
cell_type_markers <- lapply(cell_type_markers, function(genes) {
toupper(genes[1:min(20, length(genes))]) # Keep top 10 genes
})
# Run ssGSEA to calculate enrichment scores using all cell types
ssgsea_scores_all <- gsva(expression_matrix, cell_type_markers, method='ssgsea', ssgsea.norm=TRUE)
# Specify cell types to keep
cell_types_to_keep <- c("pro Mono", "CD14hi Mono", "CD16hi Mono", "OC", "Mφ", "CD4 Treg", "CD8 Tex")
# Trim whitespace from row names and filter the ssGSEA scores by cell types
rownames(ssgsea_scores_all) <- trimws(rownames(ssgsea_scores_all))
cell_types_to_keep <- trimws(cell_types_to_keep)
ssgsea_scores_filtered <- ssgsea_scores_all[cell_types_to_keep, , drop=FALSE]
# Scale the data after filtering by both cell types and Met.Site
ssgsea_scores_scaled <- t(scale(t(ssgsea_scores_filtered)))
# Create ranges for PFS and oas.months and convert them to factors
gse124627.ann$PFS <- cut(as.numeric(gse124627.ann$PFS), breaks=c(0, 5, 10, 15, 20, 25, 30, 100), include.lowest=TRUE)
gse124627.ann$oas.months <- cut(as.numeric(gse124627.ann$oas.months), breaks=c(0, 5, 10, 15, 20, 40, 60, 80, 100, 120, 140), include.lowest=TRUE)
# Make sure PFS and oas.months are factors
gse124627.ann$PFS <- as.factor(gse124627.ann$PFS)
gse124627.ann$oas.months <- as.factor(gse124627.ann$oas.months)
# Generate heatmap
# Adjust the scale range to -1.5 to 1.5
breaks <- seq(-1.5, 1.5, length.out = 100)
# Create annotations for the samples (Met.Site and other annotations)
sample_annotations <- data.frame(Met.Site = gse124627.ann$site,
PFS = gse124627.ann$PFS,
oas.months = gse124627.ann$oas.months)
rownames(sample_annotations) <- rownames(gse124627.ann)
# Generate color palette for Met.Site based on the number of unique values
unique_met_sites <- unique(gse124627.ann$site)
num_met_sites <- length(unique_met_sites)
# Check if num_met_sites is greater than 10, if so, generate more colors
met_site_colors <- if (num_met_sites > 10) {
colorRampPalette(brewer.pal(9, "Paired"))(num_met_sites) # Generate more colors if needed
} else {
brewer.pal(10, "Paired")
}
# Assign colors for each unique Met.Site
names(met_site_colors) <- unique_met_sites
# Generate colors for PFS and oas.months with reversed color gradients
pfs_colors <- viridis_pal(option = "inferno", direction = -1)(length(levels(gse124627.ann$PFS))) # Reverse inferno
oas_colors <- viridis_pal(option = "viridis", direction = -1)(length(levels(gse124627.ann$oas.months))) # Reverse viridis
# Specify color palettes for annotations
annotation_colors <- list(
Met.Site = met_site_colors, # Custom palette for Met.Site
PFS = setNames(pfs_colors, levels(gse124627.ann$PFS)), # Ensure colors match PFS factor levels with reversed inferno
oas.months = setNames(oas_colors, levels(gse124627.ann$oas.months)) # Ensure colors match oas.months factor levels with reversed viridis
)
# Generate the heatmap with annotations and custom color palettes
pheatmap(ssgsea_scores_scaled,
breaks = breaks,
clustering_distance_rows = 'correlation',
clustering_distance_cols = 'correlation',
clustering_method = 'average',
show_colnames = TRUE,
show_rownames = TRUE,
cluster_rows = F,
cluster_cols = TRUE,
fontsize_row = 10,
fontsize_col = 10,
annotation_col = sample_annotations,
annotation_colors = annotation_colors)
```
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
library(RColorBrewer)
library(viridis)
# Load necessary libraries
load("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/Breast - GSE124647.RData")
# Assuming 'gse124627.clps' is your expression data matrix with genes as rows and samples as columns
# Assuming 'marker' is your data frame with cell type markers
# Assuming 'gse124627.ann' is your new sample annotation data frame
expression_matrix <- gse124627.clps
# Prepare gene names
rownames(expression_matrix) <- toupper(rownames(expression_matrix))
# Split the marker data into cell types and only keep the top 10 genes per cell type
cell_type_markers <- split(marker$gene, marker$cluster)
cell_type_markers <- lapply(cell_type_markers, function(genes) {
toupper(genes[1:min(10, length(genes))]) # Keep top 10 genes
})
# Run ssGSEA to calculate enrichment scores using all cell types
ssgsea_scores_all <- gsva(expression_matrix, cell_type_markers, method='ssgsea', ssgsea.norm=TRUE)
# Reorder the samples based on new sample annotation 'gse124627.ann'
common_samples <- intersect(colnames(ssgsea_scores_all), rownames(gse124627.ann))
gse124627.ann <- gse124627.ann[common_samples, ]
ssgsea_scores_all <- ssgsea_scores_all[, common_samples]
# Scale the data
ssgsea_scores_scaled <- t(scale(t(ssgsea_scores_all)))
# Create ranges for PFS and oas.months and convert them to factors
gse124627.ann$PFS <- cut(as.numeric(gse124627.ann$PFS), breaks=c(0, 5, 10, 15, 20, 25, 30, 100), include.lowest=TRUE)
gse124627.ann$oas.months <- cut(as.numeric(gse124627.ann$oas.months), breaks=c(0, 5, 10, 15, 20, 40, 60, 80, 100, 120, 140), include.lowest=TRUE)
# Make sure PFS and oas.months are factors
gse124627.ann$PFS <- as.factor(gse124627.ann$PFS)
gse124627.ann$oas.months <- as.factor(gse124627.ann$oas.months)
# Generate heatmap
# Adjust the scale range to -1.5 to 1.5
breaks <- seq(-1.5, 1.5, length.out = 100)
# Create annotations for the samples (Met.Site and other annotations)
sample_annotations <- data.frame(Met.Site = gse124627.ann$site,
PFS = gse124627.ann$PFS,
oas.months = gse124627.ann$oas.months)
rownames(sample_annotations) <- rownames(gse124627.ann)
# Generate color palette for Met.Site based on the number of unique values
unique_met_sites <- unique(gse124627.ann$site)
num_met_sites <- length(unique_met_sites)
# Check if num_met_sites is greater than 10, if so, generate more colors
met_site_colors <- if (num_met_sites > 10) {
colorRampPalette(brewer.pal(9, "Paired"))(num_met_sites) # Generate more colors if needed
} else {
brewer.pal(10, "Paired")
}
# Assign colors for each unique Met.Site
names(met_site_colors) <- unique_met_sites
# Generate colors for PFS and oas.months
pfs_colors <- viridis_pal(option = "inferno",direction = -1)(length(levels(gse124627.ann$PFS)))
oas_colors <- viridis_pal(option = "viridis",direction = -1)(length(levels(gse124627.ann$oas.months)))
# Specify color palettes for annotations
annotation_colors <- list(
Met.Site = met_site_colors, # Custom palette for Met.Site
PFS = setNames(pfs_colors, levels(gse124627.ann$PFS)), # Ensure colors match PFS factor levels
oas.months = setNames(oas_colors, levels(gse124627.ann$oas.months)) # Ensure colors match oas.months factor levels
)
# Generate the heatmap with annotations and custom color palettes
pheatmap(ssgsea_scores_scaled,
breaks = breaks,
clustering_distance_rows = 'correlation',
clustering_distance_cols = 'correlation',
clustering_method = 'complete',
show_colnames = TRUE,
show_rownames = TRUE,
cluster_rows = TRUE,
cluster_cols = TRUE,
fontsize_row = 10,
fontsize_col = 10,
annotation_col = sample_annotations,
annotation_colors = annotation_colors)
```
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
library(RColorBrewer)
library(viridis)
load("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/Breast - GSE124647.RData")
# Assuming 'gse124627.clps' is your expression data matrix with genes as rows and samples as columns
# Assuming 'marker' is your data frame with cell type markers
# Assuming 'gse124627.ann' is your new sample annotation data frame
expression_matrix <- gse124627.clps
# Prepare gene names
rownames(expression_matrix) <- toupper(rownames(expression_matrix))
# Split the marker data into cell types and only keep the top 10 genes per cell type
cell_type_markers <- split(marker$gene, marker$cluster)
cell_type_markers <- lapply(cell_type_markers, function(genes) {
toupper(genes[1:min(10, length(genes))]) # Keep top 10 genes
})
# Run ssGSEA to calculate enrichment scores using all cell types
ssgsea_scores_all <- gsva(expression_matrix, cell_type_markers, method='ssgsea', ssgsea.norm=TRUE)
# Specify cell types to keep
cell_types_to_keep <- c("pro Mono", "CD14hi Mono", "CD16hi Mono", "OC", "Mφ", "CD4 Treg", "CD8 Tex")
# Trim whitespace from row names and filter the ssGSEA scores by cell types
rownames(ssgsea_scores_all) <- trimws(rownames(ssgsea_scores_all))
cell_types_to_keep <- trimws(cell_types_to_keep)
ssgsea_scores_filtered <- ssgsea_scores_all[cell_types_to_keep, , drop=FALSE]
# Now filter by Met.Site for only samples where Met.Site is "Bone"
samples_to_keep <- rownames(gse124627.ann)[gse124627.ann$site == "Bone"]
# Reorder and filter the samples based on the filtered Met.Site and common samples
common_samples <- intersect(colnames(ssgsea_scores_filtered), samples_to_keep)
gse124627.ann <- gse124627.ann[common_samples, ]
ssgsea_scores_filtered <- ssgsea_scores_filtered[, common_samples]
# Scale the data after filtering by both cell types and Met.Site
ssgsea_scores_scaled <- t(scale(t(ssgsea_scores_filtered)))
# Create ranges for PFS and oas.months and convert them to factors
gse124627.ann$PFS <- cut(as.numeric(gse124627.ann$PFS), breaks=c(0, 5, 10, 15, 20, 25, 30, 100), include.lowest=TRUE)
gse124627.ann$oas.months <- cut(as.numeric(gse124627.ann$oas.months), breaks=c(0, 5, 10, 15, 20, 40, 60, 80, 100, 120, 140), include.lowest=TRUE)
# Make sure PFS and oas.months are factors
gse124627.ann$PFS <- as.factor(gse124627.ann$PFS)
gse124627.ann$oas.months <- as.factor(gse124627.ann$oas.months)
# Generate heatmap
# Adjust the scale range to -1.5 to 1.5
breaks <- seq(-1.5, 1.5, length.out = 100)
# Create annotations for the samples (Met.Site and other annotations)
sample_annotations <- data.frame(Met.Site = gse124627.ann$site,
PFS = gse124627.ann$PFS,
oas.months = gse124627.ann$oas.months)
rownames(sample_annotations) <- rownames(gse124627.ann)
# Generate color palette for Met.Site based on the number of unique values
unique_met_sites <- unique(gse124627.ann$site)
num_met_sites <- length(unique_met_sites)
# Check if num_met_sites is greater than 10, if so, generate more colors
met_site_colors <- if (num_met_sites > 10) {
colorRampPalette(brewer.pal(9, "Paired"))(num_met_sites) # Generate more colors if needed
} else {
brewer.pal(10, "Paired")
}
# Assign colors for each unique Met.Site
names(met_site_colors) <- unique_met_sites
# Generate colors for PFS and oas.months with reversed color gradients
pfs_colors <- viridis_pal(option = "inferno", direction = -1)(length(levels(gse124627.ann$PFS))) # Reverse inferno
oas_colors <- viridis_pal(option = "viridis", direction = -1)(length(levels(gse124627.ann$oas.months))) # Reverse viridis
# Specify color palettes for annotations
annotation_colors <- list(
Met.Site = met_site_colors, # Custom palette for Met.Site
PFS = setNames(pfs_colors, levels(gse124627.ann$PFS)), # Ensure colors match PFS factor levels with reversed inferno
oas.months = setNames(oas_colors, levels(gse124627.ann$oas.months)) # Ensure colors match oas.months factor levels with reversed viridis
)
# Generate the heatmap with annotations and custom color palettes
pheatmap(ssgsea_scores_scaled,
breaks = breaks,
clustering_distance_rows = 'correlation',
clustering_distance_cols = 'correlation',
clustering_method = 'average',
show_colnames = TRUE,
show_rownames = TRUE,
cluster_rows = F,
cluster_cols = TRUE,
fontsize_row = 10,
fontsize_col = 10,
annotation_col = sample_annotations,
annotation_colors = annotation_colors)
```
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
library(RColorBrewer)
library(viridis)
# Assuming 'gse124627.clps' is your expression data matrix with genes as rows and samples as columns
# Assuming 'marker' is your data frame with cell type markers
load("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/Breast - GSE124647.RData")
# Assuming 'gse124627.ann' is your new sample annotation data frame
expression_matrix <- gse124627.clps
# Prepare gene names
rownames(expression_matrix) <- toupper(rownames(expression_matrix))
# Split the marker data into cell types and only keep the top 10 genes per cell type
cell_type_markers <- split(marker$gene, marker$cluster)
cell_type_markers <- lapply(cell_type_markers, function(genes) {
toupper(genes[1:min(10, length(genes))]) # Keep top 10 genes
})
# Run ssGSEA to calculate enrichment scores using all cell types
ssgsea_scores_all <- gsva(expression_matrix, cell_type_markers, method='ssgsea', ssgsea.norm=TRUE)
# Filter the samples to include only those where Met.Site is "Bone"
bone_samples <- rownames(gse124627.ann)[gse124627.ann$site == "Bone"]
common_samples <- intersect(colnames(ssgsea_scores_all), bone_samples)
gse124627.ann <- gse124627.ann[common_samples, ]
ssgsea_scores_filtered <- ssgsea_scores_all[, common_samples]
# Scale the data after filtering
ssgsea_scores_scaled <- t(scale(t(ssgsea_scores_filtered)))
# Create ranges for PFS and oas.months and convert them to factors
gse124627.ann$PFS <- cut(as.numeric(gse124627.ann$PFS), breaks=c(0, 5, 10, 15, 20, 25, 30, 100), include.lowest=TRUE)
gse124627.ann$oas.months <- cut(as.numeric(gse124627.ann$oas.months), breaks=c(0, 5, 10, 15, 20, 40, 60, 80, 100, 120, 140), include.lowest=TRUE)
# Make sure PFS and oas.months are factors
gse124627.ann$PFS <- as.factor(gse124627.ann$PFS)
gse124627.ann$oas.months <- as.factor(gse124627.ann$oas.months)
# Generate heatmap
# Adjust the scale range to -1.5 to 1.5
breaks <- seq(-0.5, 1.5, length.out = 100)
# Create annotations for the samples (Met.Site and other annotations)
sample_annotations <- data.frame(Met.Site = gse124627.ann$site,
PFS = gse124627.ann$PFS,
oas.months = gse124627.ann$oas.months)
rownames(sample_annotations) <- rownames(gse124627.ann)
# Generate color palette for Met.Site based on the number of unique values
unique_met_sites <- unique(gse124627.ann$site)
num_met_sites <- length(unique_met_sites)
# Check if num_met_sites is greater than 10, if so, generate more colors
met_site_colors <- if (num_met_sites > 10) {
colorRampPalette(brewer.pal(9, "Paired"))(num_met_sites) # Generate more colors if needed
} else {
brewer.pal(10, "Paired")
}
# Assign colors for each unique Met.Site
names(met_site_colors) <- unique_met_sites
# Generate colors for PFS and oas.months with reversed color gradients
pfs_colors <- viridis_pal(option = "inferno", direction = -1)(length(levels(gse124627.ann$PFS))) # Reverse inferno
oas_colors <- viridis_pal(option = "viridis", direction = -1)(length(levels(gse124627.ann$oas.months))) # Reverse viridis
# Specify color palettes for annotations
annotation_colors <- list(
Met.Site = met_site_colors, # Custom palette for Met.Site
PFS = setNames(pfs_colors, levels(gse124627.ann$PFS)), # Ensure colors match PFS factor levels with reversed inferno
oas.months = setNames(oas_colors, levels(gse124627.ann$oas.months)) # Ensure colors match oas.months factor levels with reversed viridis
)
pdf("../../../Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/outs/GSE_124647_selected_heatmap.pdf", width = 6, height = 8) # Adjust width and height as needed
# Generate the heatmap with annotations and custom color palettes
pheatmap(ssgsea_scores_scaled,
breaks = breaks,
#scale = "column",
clustering_distance_rows = 'correlation',
clustering_distance_cols = 'correlation',
clustering_method = 'complete',
show_colnames = TRUE,
show_rownames = TRUE,
cluster_rows = TRUE,
cluster_cols = TRUE,
fontsize_row = 10,
fontsize_col = 10,
annotation_col = sample_annotations,
annotation_colors = annotation_colors)
dev.off()
```
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
library(RColorBrewer)
library(viridis)
# Assuming 'gse124627.clps' is your expression data matrix with genes as rows and samples as columns
# Assuming 'marker' is your data frame with cell type markers
load("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/Breast - GSE124647.RData")
# Assuming 'gse124627.ann' is your new sample annotation data frame
expression_matrix <- gse124627.clps
# Load necessary libraries
library(survival)
library(survminer)
# Filter the annotation and expression matrix to include only samples where Met.Site is "Bone"
bone_samples <- rownames(gse124627.ann)[gse124627.ann$site == "Bone"]
gse124627.ann_bone <- gse124627.ann[bone_samples, ]
expression_matrix_bone <- expression_matrix[, bone_samples]
# Extract the markers for both "CD4 Treg" and "CD8 Tex"
cd4_treg_markers <- cell_type_markers[["CD4 Treg"]]
cd8_tex_markers <- cell_type_markers[["CD8 Tex"]]
# Combine the markers for CD4 Treg and CD8 Tex
combined_markers <- union(cd4_treg_markers, cd8_tex_markers)
# Filter the expression matrix to include only these genes
expression_matrix_filtered <- expression_matrix_bone[rownames(expression_matrix_bone) %in% combined_markers, ]
# Create an average expression score for each sample based on the combined markers
expression_score <- colMeans(expression_matrix_filtered)
# Add the expression score to the annotation data
gse124627.ann_bone$expression_score <- expression_score
# Create a binary classification based on median expression
gse124627.ann_bone$high_expression <- ifelse(gse124627.ann_bone$expression_score > median(gse124627.ann_bone$expression_score), "High", "Low")
# 1. Draw the survival curve for PFS
surv_object_pfs <- Surv(time = gse124627.ann_bone$PFS, event = gse124627.ann_bone$PFS.event)
fit_pfs <- survfit(surv_object_pfs ~ high_expression, data = gse124627.ann_bone)
# Plot the PFS survival curve
ggsurvplot(fit_pfs, data = gse124627.ann_bone, pval = TRUE,
title = "PFS Survival Curve based on CD4 Treg and CD8 Tex Markers",
xlab = "Time (Months)", ylab = "Survival Probability")
# 2. Draw the survival curve for oas.months
surv_object_oas <- Surv(time = gse124627.ann_bone$oas.months, event = gse124627.ann_bone$os.event)
fit_oas <- survfit(surv_object_oas ~ high_expression, data = gse124627.ann_bone)
# Plot the OAS survival curve
ggsurvplot(fit_oas, data = gse124627.ann_bone, pval = TRUE,
title = "Overall Survival Curve (OAS) based on CD4 Treg and CD8 Tex Markers",
xlab = "Time (Months)", ylab = "Survival Probability")
```
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
library(RColorBrewer)
library(viridis)
rm(list = ls())
# Assuming 'gse124627.clps' is your expression data matrix with genes as rows and samples as columns
# Assuming 'marker' is your data frame with cell type markers
load("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/Breast - GSE124647.RData")
marker <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/markers_from_scRNA.rds")
# Assuming 'gse124627.ann' is your new sample annotation data frame
expression_matrix <- gse124627.clps
# Split the marker data into cell types and only keep the top 10 genes per cell type
cell_type_markers <- split(marker$gene, marker$cluster)
cell_type_markers <- lapply(cell_type_markers, function(genes) {
toupper(genes[1:min(10, length(genes))]) # Keep top 10 genes
})
# Load necessary libraries
library(survival)
library(survminer)
library(gridExtra)
# Define the PDF file where plots will be saved
pdf("./outs/GSE124647_PFS_Survival_Curves.pdf", width = 10, height = 15)
# Get unique Met.Sites
met_sites <- unique(gse124627.ann$site)
# Extract the markers for both "CD4 Treg" and "CD8 Tex"
cd4_treg_markers <- cell_type_markers[["CD4 Treg"]]
cd8_tex_markers <- cell_type_markers[["CD8 Tex"]]
# Combine the markers for CD4 Treg and CD8 Tex
combined_markers <- union(cd4_treg_markers, cd8_tex_markers)
# Filter the expression matrix to include only these genes
expression_matrix_filtered <- expression_matrix[rownames(expression_matrix) %in% combined_markers, ]
# Initialize an empty list to store plots
plot_list <- list()
# Loop through each Met.Site and generate a survival plot for PFS
for (site in met_sites) {
# Filter samples based on Met.Site
site_samples <- rownames(gse124627.ann)[gse124627.ann$site == site]
gse124627.ann_site <- gse124627.ann[site_samples, ]
# Check if there are enough samples for survival analysis
if (length(site_samples) < 2) {
message(paste("Not enough samples for", site, "skipping..."))
next
}
expression_matrix_site <- expression_matrix_filtered[, site_samples, drop=FALSE]
# Check if the filtered matrix has more than one column
if (ncol(expression_matrix_site) > 1) {
# Create an average expression score for each sample based on the combined markers
expression_score <- colMeans(expression_matrix_site)
} else {
# If only one sample, just use the values directly
expression_score <- expression_matrix_site
}
# Add the expression score to the annotation data
gse124627.ann_site$expression_score <- expression_score
# Create a binary classification based on median expression
gse124627.ann_site$high_expression <- ifelse(gse124627.ann_site$expression_score > median(gse124627.ann_site$expression_score), "High", "Low")
# Draw the survival curve for PFS for the current Met.Site
surv_object_pfs <- Surv(time = gse124627.ann_site$PFS, event = gse124627.ann_site$PFS.event)
fit_pfs <- survfit(surv_object_pfs ~ high_expression, data = gse124627.ann_site)
# Generate the PFS survival plot
p <- ggsurvplot(fit_pfs, data = gse124627.ann_site, pval = TRUE,
title = paste("Survival Curve for BC ", site, "metastasis\n based on CD4 Treg and CD8 Tex Signatures"),
xlab = "Time (Months)", ylab = "PFS Probability",
pval.size = 4, # Set the size of the p-value text
font.title = c(12, "bold"), # Set the font size and style for the title
font.x = c(8), # Set the font size for the x-axis label
font.y = c(8), # Set the font size for the y-axis label
font.tickslab = c(12)) # Set the font size for the axis tick labels
# Add the plot to the list
plot_list[[site]] <- p$plot
}
# Arrange the plots in a grid layout and save as a one-page PDF
if (length(plot_list) > 0) {
grid.arrange(grobs = plot_list, ncol = 2) # Adjust ncol to change layout
}
# Close the PDF device
dev.off()
```
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
library(RColorBrewer)
library(viridis)
library(survival)
library(survminer)
library(gridExtra)
library(ggplot2)
# Clear the workspace
rm(list = ls())
# Load expression data and marker information
load("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/Breast - GSE124647.RData")
marker <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/markers_from_scRNA.rds")
# Assign expression matrix
expression_matrix <- gse124627.clps
# Split the marker data into cell types and keep the top 10 genes per cell type
cell_type_markers <- split(marker$gene, marker$cluster)
cell_type_markers <- lapply(cell_type_markers, function(genes) {
toupper(genes[1:min(10, length(genes))]) # Keep top 10 genes
})
# Define the PDF file where plots will be saved
pdf("./outs/GSE124647_PFS_Survival_Curves.pdf", width = 10, height = 15)
# Get unique metastatic sites
met_sites <- unique(gse124627.ann$site)
# Extract the markers for both "CD4 Treg" and "CD8 Tex"
cd4_treg_markers <- cell_type_markers[["CD4 Treg"]]
cd8_tex_markers <- cell_type_markers[["CD8 Tex"]]
# Combine the markers for CD4 Treg and CD8 Tex
combined_markers <- union(cd4_treg_markers, cd8_tex_markers)
# Filter the expression matrix to include only these genes
expression_matrix_filtered <- expression_matrix[rownames(expression_matrix) %in% combined_markers, ]
# Initialize an empty list to store combined plots and tables
plot_list <- list()
# Loop through each Met.Site and generate a survival plot for PFS with contingency table
for (site in met_sites) {
# Filter samples based on Met.Site
site_samples <- rownames(gse124627.ann)[gse124627.ann$site == site]
gse124627.ann_site <- gse124627.ann[site_samples, ]
# Check if there are enough samples for survival analysis
if (length(site_samples) < 2) {
message(paste("Not enough samples for", site, "skipping..."))
next
}
# Filter the expression matrix for the current site
expression_matrix_site <- expression_matrix_filtered[, site_samples, drop=FALSE]
# Check if the filtered matrix has more than one column
if (ncol(expression_matrix_site) > 1) {
# Create an average expression score for each sample based on the combined markers
expression_score <- colMeans(expression_matrix_site)
} else {
# If only one sample, use the values directly
expression_score <- expression_matrix_site
}
# Add the expression score to the annotation data
gse124627.ann_site$expression_score <- expression_score
# Create a binary classification based on median expression
median_score <- median(gse124627.ann_site$expression_score, na.rm = TRUE)
gse124627.ann_site$high_expression <- ifelse(gse124627.ann_site$expression_score > median_score, "High", "Low")
# Handle NA values in high_expression
gse124627.ann_site$high_expression[is.na(gse124627.ann_site$high_expression)] <- "Low"
# Generate the contingency table
contingency_table <- table(
High_Expression = gse124627.ann_site$high_expression,
PFS_Event = gse124627.ann_site$PFS.event
)
# Print contingency table for debugging (optional)
# print(paste("Contingency Table for", site))
# print(contingency_table)
# Draw the survival curve for PFS for the current Met.Site
surv_object_pfs <- Surv(time = gse124627.ann_site$PFS, event = gse124627.ann_site$PFS.event)
fit_pfs <- survfit(surv_object_pfs ~ high_expression, data = gse124627.ann_site)
# Generate the PFS survival plot
p <- ggsurvplot(
fit_pfs,
data = gse124627.ann_site,
pval = TRUE,
title = NULL, # Remove the title
subtitle = paste("Metastatic Site:", site),
xlab = "Time (Months)",
ylab = "PFS Probability",
pval.size = 4, # Size of the p-value text
font.title = c(12, "bold"), # Font size and style for the title
font.subtitle = c(10, "italic"), # Font size and style for the subtitle
font.x = c(8), # Font size for the x-axis label
font.y = c(8), # Font size for the y-axis label
font.tickslab = c(10), # Font size for the axis tick labels
legend.title = "Expression Level",
legend.labs = c("High", "Low"),
legend = "right",
risk.table = FALSE,
ncensor.plot = FALSE,
ggtheme = theme_minimal(),
palette = c("#E7B800", "#2E9FDF"), # Custom colors for High and Low
size = 1,
alpha = 0.8,
break.time.by = 5,
surv.median.line = "hv", # Add horizontal and vertical lines at median survival
show = FALSE
)
# Create a ggplot object for the contingency table
# Convert the table to a data frame for ggplot
contingency_df <- as.data.frame.matrix(contingency_table)
contingency_df$High_Expression <- rownames(contingency_df)
# Melt the data frame for ggplot
library(reshape2)
contingency_melt <- melt(contingency_df, id.vars = "High_Expression", variable.name = "PFS_Event", value.name = "Count")
# Create the contingency table plot
table_plot <- ggplot(contingency_melt, aes(x = PFS_Event, y = High_Expression, fill = Count)) +
geom_tile(color = "white") +
geom_text(aes(label = Count), color = "black", size = 4) +
scale_fill_gradient(low = "white", high = "steelblue") +
labs(title = "Contingency Table",
x = "PFS Event",
y = "Expression Level") +
theme_minimal() +
theme(
plot.title = element_text(size = 10, face = "bold"),
axis.title = element_text(size = 8),
axis.text = element_text(size = 8),
legend.position = "none" # Hide the fill legend
)
# Combine the survival plot and contingency table vertically
combined_plot <- arrangeGrob(
p$plot,
table_plot,
ncol = 1,
heights = c(3, 1) # Adjust heights as needed
)
# Add the combined plot to the list
plot_list[[site]] <- combined_plot
}
# Arrange all combined plots in a grid layout and save as a single-page PDF
if (length(plot_list) > 0) {
# Determine the number of columns based on the number of plots
num_cols <- 2
num_rows <- ceiling(length(plot_list) / num_cols)
# Arrange the plots
grid_arranged <- marrangeGrob(
grobs = plot_list,
nrow = num_rows,
ncol = num_cols,
top = "PFS Survival Curves with Contingency Tables"
)
# Print the arranged plots to the PDF
print(grid_arranged)
}
# Close the PDF device
dev.off()
```
```{r}
# Load necessary libraries
library(GSVA)
library(GSEABase)
library(pheatmap)
library(RColorBrewer)
library(viridis)
library(survival)
library(survminer)
library(gridExtra)
library(ggplot2)
library(reshape2)
# Clear the workspace
rm(list = ls())
# Load expression data and marker information
load("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/Breast - GSE124647.RData")
marker <- readRDS("~/Library/CloudStorage/OneDrive-BaylorCollegeofMedicine/Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/data/markers_from_scRNA.rds")
# Assign expression matrix
expression_matrix <- gse124627.clps
# Split the marker data into cell types and keep the top 10 genes per cell type
cell_type_markers <- split(marker$gene, marker$cluster)
cell_type_markers <- lapply(cell_type_markers, function(genes) {
toupper(genes[1:min(10, length(genes))]) # Keep top 10 genes
})
# Define the PDF file where plots will be saved
#pdf("../../../Human_Bone_Mets_Comparsion_scRNA/Additional human metastasis datasets/Bulk_Microarray/outs/GSE124647_PFS_Survival_Curves_with_Contingency_Tables.pdf", width = 10, height = 15)
# Get unique metastatic sites
met_sites <- unique(gse124627.ann$site)
# Extract the markers for both "CD4 Treg" and "CD8 Tex"
cd4_treg_markers <- cell_type_markers[["CD4 Treg"]]
cd8_tex_markers <- cell_type_markers[["CD8 Tex"]]
# Combine the markers for CD4 Treg and CD8 Tex
combined_markers <- union(cd4_treg_markers, cd8_tex_markers)
# Filter the expression matrix to include only these genes
expression_matrix_filtered <- expression_matrix[rownames(expression_matrix) %in% combined_markers, ]
# Initialize an empty list to store combined plots and tables
plot_list <- list()
# Loop through each Met.Site and generate a survival plot for PFS with contingency table
for (site in met_sites) {
# Filter samples based on Met.Site
site_samples <- rownames(gse124627.ann)[gse124627.ann$site == site]
gse124627.ann_site <- gse124627.ann[site_samples, ]
# Check if there are enough samples for survival analysis
if (length(site_samples) < 2) {
message(paste("Not enough samples for", site, "skipping..."))
next
}
# Filter the expression matrix for the current site
expression_matrix_site <- expression_matrix_filtered[, site_samples, drop=FALSE]
# Check if the filtered matrix has more than one column
if (ncol(expression_matrix_site) > 1) {
# Create an average expression score for each sample based on the combined markers
expression_score <- colMeans(expression_matrix_site)
} else {
# If only one sample, use the values directly
expression_score <- expression_matrix_site
}
# Add the expression score to the annotation data
gse124627.ann_site$expression_score <- expression_score
# Create a binary classification based on median expression
median_score <- median(gse124627.ann_site$expression_score, na.rm = TRUE)
gse124627.ann_site$high_expression <- ifelse(gse124627.ann_site$expression_score > median_score, "High", "Low")
# Handle NA values in high_expression
gse124627.ann_site$high_expression[is.na(gse124627.ann_site$high_expression)] <- "Low"
# Calculate statistics for the contingency table
# Number of samples in each group
count_table <- table(gse124627.ann_site$high_expression)
# Probability that PFS > 12 months
prob_pfs_12 <- tapply(gse124627.ann_site$PFS > 12, gse124627.ann_site$high_expression, mean, na.rm = TRUE)
# Create a data frame for the contingency table
contingency_df <- data.frame(
Group = names(count_table),
Count = as.integer(count_table),
Total = as.integer(count_table), # Total is the same as count
`PFS > 12 Months Probability` = round(prob_pfs_12, 2)
)
# Draw the survival curve for PFS for the current Met.Site
surv_object_pfs <- Surv(time = gse124627.ann_site$PFS, event = gse124627.ann_site$PFS.event)
fit_pfs <- survfit(surv_object_pfs ~ high_expression, data = gse124627.ann_site)
# Generate the PFS survival plot
p <- ggsurvplot(
fit_pfs,
data = gse124627.ann_site,
pval = TRUE,
title = NULL, # Remove the title
subtitle = paste("Metastatic Site:", site),
xlab = "Time (Months)",
ylab = "PFS Probability",
pval.size = 4, # Size of the p-value text
font.title = c(12, "bold"), # Font size and style for the title
font.subtitle = c(10, "italic"), # Font size and style for the subtitle
font.x = c(8), # Font size for the x-axis label
font.y = c(8), # Font size for the y-axis label
font.tickslab = c(10), # Font size for the axis tick labels
legend.title = "Expression Level",
legend.labs = c("High", "Low"),
legend = "right",
risk.table = FALSE,
ncensor.plot = FALSE,
ggtheme = theme_minimal(),
palette = c("#E7B800", "#2E9FDF"), # Custom colors for High and Low
size = 1,
alpha = 0.8,
break.time.by = 5,
surv.median.line = "hv" # Add horizontal and vertical lines at median survival
)
# Create the contingency table plot
table_plot <- tableGrob(contingency_df, rows = NULL, theme = ttheme_minimal(base_size = 10))
# Combine the survival plot and contingency table vertically
combined_plot <- arrangeGrob(
p$plot,
table_plot,
ncol = 1,
heights = c(3, 1) # Adjust heights as needed
)
# Add the combined plot to the list
plot_list[[site]] <- combined_plot
}
# Arrange all combined plots in a grid layout and save as a single-page PDF
if (length(plot_list) > 0) {
# Determine the number of columns based on the number of plots
num_cols <- 2
num_rows <- ceiling(length(plot_list) / num_cols)
# Arrange the plots
grid_arranged <- marrangeGrob(
grobs = plot_list,
nrow = num_rows,
ncol = num_cols,
top = "PFS Survival Curves with Contingency Tables"
)
# Print the arranged plots to the PDF
print(grid_arranged)
}
# Close the PDF device
#dev.off()
```