setwd("~/rotation2/") # Mitochondrial genes # pull a full list of corresponding Ensembl gene IDs and symbols #BiocManager::install("biomaRt") library(biomaRt) hsapiens_genes <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol"), mart = useMart("ensembl", dataset = "hsapiens_gene_ensembl")) write.table(hsapiens_genes, file="../Morris_2021/hsapiens_genes.csv", sep=",", row.names = FALSE) rm(hsapiens_genes) # trim features library(tidyr) gene_names <- read.table("../Morris_2021/GSM5225857_cDNA.features.txt") gene_names <- separate(gene_names, "V1", into=c("gene","version"), sep="[.]") gene_names <- gene_names[,"gene"] writeLines(gene_names, "../Morris_2021/cDNA_test.txt") rm(gene_names) # gene id to symbol conversion library(expss) library(dplyr) feature <- read.table("../Morris_2021/cDNA_test.txt") colnames(feature) <- c("id") ref <- read.csv("../Morris_2021/hsapiens_genes.csv", na.string="") colnames(ref) <- c("id","symbol") ref <- mutate(ref, dup=duplicated(ref[,"id"])) ref <- filter(ref, dup=="FALSE") ref <- ref[,1:2] feature <- add_columns(feature, ref, by=c("id")) feature$gene <- ifelse(is.na(feature$symbol), feature$id, feature$symbol) writeLines(feature$gene, "../Morris_2021/cDNA_test_feature.txt") rm(feature,ref) # correct for MT psudogenes gene_card <- read.csv("../Morris_2021/GeneCards-SearchResults.csv")[,1:2] # searched "Mitochondrially Encoded" on Gene Cards and downloaded all genes colnames(gene_card) <- c("symbol","description") gene_card <- filter(gene_card, grepl('Mitochondrially Encoded', description) | grepl('MT-', description) | grepl('MT-', symbol)) feature <- read.csv("../Morris_2021/cDNA_test_feature.txt",header = FALSE) colnames(feature) <- c("symbol") feature <- add_columns(feature, gene_card, by="symbol") feature <- mutate(feature, symbol2=(ifelse(!is.na(feature$description), paste0("MT-",feature$symbol), feature$symbol))) feature$symbol2 <- gsub('MT-MT-','MT-',feature$symbol2) writeLines(feature$symbol2, "../Morris_2021/cDNA_test_feature_corrected.txt") rm(feature,gene_card) ##################################################################################### # cDNA (filter by UMI and percent-mito) library(Seurat) Expression <- Seurat::ReadMtx(mtx = "../Morris_2021/GSM5225857_cDNA.mtx", features = "../Morris_2021/cDNA_test_feature_corrected.txt", cells = "../Morris_2021/GSM5225857_cDNA.barcodes.txt", cell.column=1, feature.column=1, mtx.transpose=T) Expression <- Seurat::CreateSeuratObject(counts = Expression, project = "STING-seq") Expression[["percent.mt"]] <- Seurat::PercentageFeatureSet(Expression, pattern = "^MT-") # plot before QC filename <- "./analysis/cache/5_violin_before_QC.tif" tiff(file=filename, width=8, height=5, units="in", res=1200) Seurat::VlnPlot(Expression, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) dev.off() filename <- "./analysis/cache/6_percent_mt_before_QC.tif" tiff(file=filename, width=8, height=5, units="in", res=1200) Seurat::FeatureScatter(Expression, feature1 = "nCount_RNA", feature2 = "percent.mt") dev.off() filename <- "./analysis/cache/7_nFeature_before_QC.tif" tiff(file=filename, width=8, height=5, units="in", res=1200) Seurat::FeatureScatter(Expression, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") dev.off() Expression <- subset(Expression, subset = nFeature_RNA >= 850 & percent.mt < 20) median_genes_per_cell <- median(Expression@meta.data$nFeature_RNA) dim(Expression) median_genes_per_cell ##################################################################################### # HTO (filter by UMI and demuxed droplets/singlets/multiplets) library(Seurat) Hashtag <- Seurat::ReadMtx(mtx = "../Morris_2021/GSM5225859_HTO.mtx", features = "../Morris_2021/GSM5225859_HTO.features.txt", cells = "../Morris_2021/GSM5225859_HTO.barcodes.txt", cell.column=1, feature.column=1, mtx.transpose=T) Hashtag <- Seurat::CreateSeuratObject(counts = Hashtag, project = "STING-seq") Hashtag <- subset(Hashtag, subset = nFeature_RNA >= 1 & nCount_RNA < 2500) Hashtag <- subset(x = Hashtag, cells = colnames(Hashtag)[(colnames(Hashtag) %in% c(colnames(Expression)))]) # retain only cells in Expression dim(Hashtag) Hashtag <- Seurat::NormalizeData(Hashtag, normalization.method = "CLR", scale.factor = 10000, margin = 2) # CLR across cells Hashtag <- Seurat::HTODemux(Hashtag, assay = "RNA", positive.quantile = 0.91) # find singlets (I don't know how to adjust parameters for this) Hashtag <- subset(x = Hashtag, RNA_classification.global == "Singlet") dim(Hashtag) ##################################################################################### # GDO (filter by UMI) library(Seurat) gRNA <- Seurat::ReadMtx(mtx = "../Morris_2021/GSM5225858_GDO.mtx", features = "../Morris_2021/GSM5225858_GDO.features.txt", cells = "../Morris_2021/GSM5225858_GDO.barcodes.txt", cell.column=1, feature.column=1, mtx.transpose=T) gRNA <- Seurat::CreateSeuratObject(counts = gRNA, project = "STING-seq") gRNA <- subset(x = gRNA, cells = colnames(gRNA)[(colnames(gRNA) %in% c(colnames(Hashtag)))]) # retain only cells in Hashtag gRNA <- subset(gRNA, subset = nCount_RNA >= 5 & nCount_RNA <= 15000) dim(gRNA) ##################################################################################### # plot after QC Expression <- subset(x = Expression, cells = colnames(Expression)[(colnames(Expression) %in% c(colnames(gRNA)))]) # retain only cells in gRNA dim(Expression) Hashtag <- subset(x = Hashtag, cells = colnames(Hashtag)[(colnames(Hashtag) %in% c(colnames(gRNA)))]) # retain only cells in gRNA dim(Hashtag) dim(gRNA) filename <- "./analysis/cache/8_violin_after_QC.tif" tiff(file=filename, width=8, height=5, units="in", res=1200) Seurat::VlnPlot(Expression, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3) dev.off() filename <- "./analysis/cache/9_percent_mt_after_QC.tif" tiff(file=filename, width=8, height=5, units="in", res=1200) Seurat::FeatureScatter(Expression, feature1 = "nCount_RNA", feature2 = "percent.mt") dev.off() filename <- "./analysis/cache/10_nFeature_after_QC.tif" tiff(file=filename, width=8, height=5, units="in", res=1200) Seurat::FeatureScatter(Expression, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") dev.off() ##################################################################################### # save filtered Seurat objects as RDS for future processing dir.create("../Morris_2021/filter", showWarnings = F) saveRDS(Expression, "../Morris_2021/filter/Expression.rds") saveRDS(Hashtag, "../Morris_2021/filter/Hashtag.rds") saveRDS(gRNA, "../Morris_2021/filter/gRNAg.rds") writeLines(gRNA@assays[["RNA"]]@counts@Dimnames[[2]], "../Morris_2021/QC_by_hang.txt") rm(list = ls())