--- title: "01_integration_Seurat.v5_47_samples" output: html_document date: "2024-09-18" --- ```{r} library(Seurat) ``` ```{r} file_paths <- list.files(path = "./data/individual_seurat_objs/", pattern = "\\.rds$", full.names = TRUE) # Sort file paths if necessary (if they are not already in the desired order) file_paths <- sort(file_paths) # Read each file and assign it to a variable in the global environment for (i in seq_along(file_paths)) { # Create a variable name based on the index var_name <- paste0("obj", i) # Read the .rds file and assign it to a variable in the global environment assign(var_name, readRDS(file_paths[i])) } ``` ```{r} #merge the object obj <- merge(x = ob.list[[1]], y = ob.list[2:length(ob.list)]) class(obj[["RNA"]]) saveRDS(obj, './data/processed/obj.merged.132.rds') # Set the default assay to RNA to avoid SCT-related issues DefaultAssay(obj) <- "RNA" # Perform the standard Seurat pipeline obj <- NormalizeData(obj) obj <- FindVariableFeatures(obj) obj <- ScaleData(obj) obj <- RunPCA(obj) obj <- FindNeighbors(obj) obj <- FindClusters(obj, resolution = 0.1, cluster.name = "unintegrated_clusters") obj <- RunUMAP(obj, dims = 1:10) saveRDS(obj, './data/processed/obj.unintegrated.132.rds') #integration options(future.globals.maxSize = 100 * 1024^3) obj <- IntegrateLayers(object = obj, method = RPCAIntegration, orig.reduction = "pca", new.reduction = "integrated.rpca", verbose = FALSE) obj[["RNA"]] <- JoinLayers(obj[["RNA"]]) obj <- FindNeighbors(obj, reduction = "integrated.rpca", dims = 1:30) obj <- FindClusters(obj, resolution = c(5.1), cluster.name = "rpca_clusters") obj <- RunUMAP(obj, reduction = "integrated.rpca", dims = 1:30, reduction.name = "umap.rpca") obj saveRDS(obj, './data/integrated/47.integrated.rds') ``` ```{r, cell type annotation after prediction and cross-validation} #annotation with major celltype with counts obj@active.ident <- factor(obj$rpca_clusters) celltype_C=data.frame(ClusterID=0:108, celltype_C=0:108) celltype_C[celltype_C$ClusterID %in% c(10,19,75,76,60,97,30,24,79,42,44,20,85,11,99),2]='CD14hi Mono' celltype_C[celltype_C$ClusterID %in% c(34),2]='CD16hi Mono' celltype_C[celltype_C$ClusterID %in% c(37,101,51,67,36),2]='Mφ' celltype_C[celltype_C$ClusterID %in% c(96,21),2]='cDC' celltype_C[celltype_C$ClusterID %in% c(94,45),2]='pDC' celltype_C[celltype_C$ClusterID %in% c(90),2]='OC' celltype_C[celltype_C$ClusterID %in% c(87,56),2]='plasma' celltype_C[celltype_C$ClusterID %in% c(17,57),2]='immature B' celltype_C[celltype_C$ClusterID %in% c(0),2]='naive B' celltype_C[celltype_C$ClusterID %in% c(12,80,84),2]='memory B' celltype_C[celltype_C$ClusterID %in% c(74,93),2]='CLP' celltype_C[celltype_C$ClusterID %in% c(46,89,62),2]='CMP' celltype_C[celltype_C$ClusterID %in% c(73,32,91,102,106,71),2]='HPSC' celltype_C[celltype_C$ClusterID %in% c(50,35,33,53,78,88,108,38,61,55,103,92,82,69),2]='Epithelium' celltype_C[celltype_C$ClusterID %in% c(59,100),2]='CAR' celltype_C[celltype_C$ClusterID %in% c(52),2]='OB' celltype_C[celltype_C$ClusterID %in% c(72),2]='Fibroblast' celltype_C[celltype_C$ClusterID %in% c(70,82,91,102),2]='Erythroid' celltype_C[celltype_C$ClusterID %in% c(98,40),2]='Pericyte' celltype_C[celltype_C$ClusterID %in% c(65),2]='Endothelium' celltype_C[celltype_C$ClusterID %in% c(4),2]='CD16hi NK' celltype_C[celltype_C$ClusterID %in% c(15,18,22,26,7,83,9),2]='naive CD4 T' celltype_C[celltype_C$ClusterID %in% c(14,27,3,31,8),2]='memory/helper CD4 T' celltype_C[celltype_C$ClusterID %in% c(1,13,2,23,25,28,5,58,6,64,95),2]='exhausting CD8 T' celltype_C[celltype_C$ClusterID %in% c(16,39,49),2]='CD8 Teff' celltype_C[celltype_C$ClusterID %in% c(48),2]='CD56hi NK' celltype_C[celltype_C$ClusterID %in% c(29,66,86),2]='CD4 Treg' celltype_C[celltype_C$ClusterID %in% c(47),2]='CD8 Tex' celltype_C[celltype_C$ClusterID %in% c(43),2]='Treg committed naive CD4 T' celltype_C[celltype_C$ClusterID %in% c(41,43,68),2]='memory CD8 T' celltype_C[celltype_C$ClusterID %in% c(54),2]='CD16int CD56int NK' celltype_C[celltype_C$ClusterID %in% c(63),2]='naive CD8 T' head(celltype_C) celltype_C table(celltype_C$celltype_C) obj@meta.data$celltype_C = "NA" for(i in 1:nrow(celltype_C)){ obj@meta.data[which(obj@meta.data$rpca_clusters == celltype_C$ClusterID[i]),'celltype_C'] <- celltype_C$celltype_C[i]} table(obj@meta.data$celltype_C) saveRDS(obj, './data/integrated/47.integrated.rds') ``` ```{r, convert to .h5ad file format for analysis in Scanpy and Dynamo} library(sceasy) library(reticulate) DefaultAssay(obj) <- "RNA" DefaultAssay(obj) <- "RNA" obj[["RNA"]] <- as(obj[["RNA"]], "Assay") sceasy::convertFormat(obj, from = "seurat", to = "anndata", outFile = './data/integrated/47.integrated.h5ad') ```