{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Installation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`devtools::install_github(\"zji90/SCRATdatahg19\")` \n",
"`source(\"https://raw.githubusercontent.com/zji90/SCRATdata/master/installcode.R\")` "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Import packages"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading required package: BiocGenerics\n",
"Loading required package: parallel\n",
"\n",
"Attaching package: ‘BiocGenerics’\n",
"\n",
"The following objects are masked from ‘package:parallel’:\n",
"\n",
" clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\n",
" clusterExport, clusterMap, parApply, parCapply, parLapply,\n",
" parLapplyLB, parRapply, parSapply, parSapplyLB\n",
"\n",
"The following objects are masked from ‘package:stats’:\n",
"\n",
" IQR, mad, sd, var, xtabs\n",
"\n",
"The following objects are masked from ‘package:base’:\n",
"\n",
" anyDuplicated, append, as.data.frame, basename, cbind, colMeans,\n",
" colnames, colSums, dirname, do.call, duplicated, eval, evalq,\n",
" Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,\n",
" lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,\n",
" pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,\n",
" rowSums, sapply, setdiff, sort, table, tapply, union, unique,\n",
" unsplit, which, which.max, which.min\n",
"\n",
"Loading required package: S4Vectors\n",
"Loading required package: stats4\n",
"\n",
"Attaching package: ‘S4Vectors’\n",
"\n",
"The following object is masked from ‘package:base’:\n",
"\n",
" expand.grid\n",
"\n",
"Loading required package: IRanges\n",
"Loading required package: GenomeInfoDb\n",
"Loading required package: GenomicRanges\n",
"Loading required package: SummarizedExperiment\n",
"Loading required package: Biobase\n",
"Welcome to Bioconductor\n",
"\n",
" Vignettes contain introductory material; view with\n",
" 'browseVignettes()'. To cite Bioconductor, see\n",
" 'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n",
"\n",
"Loading required package: DelayedArray\n",
"Loading required package: matrixStats\n",
"\n",
"Attaching package: ‘matrixStats’\n",
"\n",
"The following objects are masked from ‘package:Biobase’:\n",
"\n",
" anyMissing, rowMedians\n",
"\n",
"Loading required package: BiocParallel\n",
"\n",
"Attaching package: ‘DelayedArray’\n",
"\n",
"The following objects are masked from ‘package:matrixStats’:\n",
"\n",
" colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges\n",
"\n",
"The following objects are masked from ‘package:base’:\n",
"\n",
" aperm, apply\n",
"\n",
"Loading required package: Biostrings\n",
"Loading required package: XVector\n",
"\n",
"Attaching package: ‘Biostrings’\n",
"\n",
"The following object is masked from ‘package:DelayedArray’:\n",
"\n",
" type\n",
"\n",
"The following object is masked from ‘package:base’:\n",
"\n",
" strsplit\n",
"\n",
"Loading required package: Rsamtools\n",
"Warning message:\n",
"“replacing previous import ‘DT::dataTableOutput’ by ‘shiny::dataTableOutput’ when loading ‘SCRAT’”Warning message:\n",
"“replacing previous import ‘DT::renderDataTable’ by ‘shiny::renderDataTable’ when loading ‘SCRAT’”Warning message:\n",
"“replacing previous import ‘mclust::em’ by ‘shiny::em’ when loading ‘SCRAT’”"
]
}
],
"source": [
"library(devtools)\n",
"library(GenomicAlignments)\n",
"library(Rsamtools)\n",
"library(SCRATdatahg19)\n",
"library(SCRAT)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Obtain Feature Matrix"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"start_time = Sys.time()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"metadata <- read.table('../../input/metadata.tsv',\n",
" header = TRUE,\n",
" stringsAsFactors=FALSE,quote=\"\",row.names=1)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"SCRATsummary <- function (dir = \"\", genome, bamfile = NULL, singlepair = \"automated\", \n",
" removeblacklist = T, log2transform = T, adjustlen = T, featurelist = c(\"GENE\", \n",
" \"ENCL\", \"MOTIF_TRANSFAC\", \"MOTIF_JASPAR\", \"GSEA\"), customfeature = NULL, \n",
" Genestarttype = \"TSSup\", Geneendtype = \"TSSdown\", Genestartbp = 3000, \n",
" Geneendbp = 1000, ENCLclunum = 2000, Motifflank = 100, GSEAterm = \"c5.bp\", \n",
" GSEAstarttype = \"TSSup\", GSEAendtype = \"TSSdown\", GSEAstartbp = 3000, \n",
" GSEAendbp = 1000) \n",
"{\n",
" if (is.null(bamfile)) {\n",
" bamfile <- list.files(dir, pattern = \".bam$\")\n",
" }\n",
" datapath <- system.file(\"extdata\", package = paste0(\"SCRATdata\", \n",
" genome))\n",
" bamdata <- list()\n",
" for (i in bamfile) {\n",
" filepath <- file.path(dir, i)\n",
" if (singlepair == \"automated\") {\n",
" bamfile <- BamFile(filepath)\n",
" tmpsingle <- readGAlignments(bamfile)\n",
" tmppair <- readGAlignmentPairs(bamfile)\n",
" pairendtf <- testPairedEndBam(bamfile)\n",
" if (pairendtf) {\n",
" tmp <- tmppair\n",
" startpos <- pmin(start(first(tmp)), start(last(tmp)))\n",
" endpos <- pmax(end(first(tmp)), end(last(tmp)))\n",
" id <- which(!is.na(as.character(seqnames(tmp))))\n",
" tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))\n",
" }\n",
" else {\n",
" tmp <- GRanges(tmpsingle)\n",
" }\n",
" }\n",
" else if (singlepair == \"single\") {\n",
" tmp <- GRanges(readGAlignments(filepath))\n",
" }\n",
" else if (singlepair == \"pair\") {\n",
" tmp <- readGAlignmentPairs(filepath)\n",
" startpos <- pmin(start(first(tmp)), start(last(tmp)))\n",
" endpos <- pmax(end(first(tmp)), end(last(tmp)))\n",
" id <- which(!is.na(as.character(seqnames(tmp))))\n",
" tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))\n",
" }\n",
" if (removeblacklist) {\n",
" load(paste0(datapath, \"/gr/blacklist.rda\"))\n",
" tmp <- tmp[-as.matrix(findOverlaps(tmp, gr))[, 1], \n",
" ]\n",
" }\n",
" bamdata[[i]] <- tmp\n",
" }\n",
" bamsummary <- sapply(bamdata, length)\n",
" allres <- NULL\n",
" datapath <- system.file(\"extdata\", package = paste0(\"SCRATdata\", \n",
" genome))\n",
" if (\"GENE\" %in% featurelist) {\n",
" print(\"Processing GENE features\")\n",
" load(paste0(datapath, \"/gr/generegion.rda\"))\n",
" if (Genestarttype == \"TSSup\") {\n",
" grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
" start(gr) - as.numeric(Genestartbp), end(gr) + \n",
" as.numeric(Genestartbp))\n",
" }\n",
" else if (Genestarttype == \"TSSdown\") {\n",
" grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
" start(gr) + as.numeric(Genestartbp), end(gr) - \n",
" as.numeric(Genestartbp))\n",
" }\n",
" else if (Genestarttype == \"TESup\") {\n",
" grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
" end(gr) - as.numeric(Genestartbp), start(gr) + \n",
" as.numeric(Genestartbp))\n",
" }\n",
" else if (Genestarttype == \"TESdown\") {\n",
" grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
" end(gr) + as.numeric(Genestartbp), start(gr) - \n",
" as.numeric(Genestartbp))\n",
" }\n",
" if (Geneendtype == \"TSSup\") {\n",
" grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
" start(gr) - as.numeric(Geneendbp), end(gr) + \n",
" as.numeric(Geneendbp))\n",
" }\n",
" else if (Geneendtype == \"TSSdown\") {\n",
" grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
" start(gr) + as.numeric(Geneendbp), end(gr) - \n",
" as.numeric(Geneendbp))\n",
" }\n",
" else if (Geneendtype == \"TESup\") {\n",
" grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
" end(gr) - as.numeric(Geneendbp), start(gr) + \n",
" as.numeric(Geneendbp))\n",
" }\n",
" else if (Geneendtype == \"TESdown\") {\n",
" grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
" end(gr) + as.numeric(Geneendbp), start(gr) - \n",
" as.numeric(Geneendbp))\n",
" }\n",
" ngr <- names(gr)\n",
" gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart, \n",
" grend), end = pmax(grstart, grend)))\n",
" names(gr) <- ngr\n",
" tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
" i))\n",
" tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
" if (log2transform) {\n",
" tmp <- log2(tmp + 1)\n",
" }\n",
" if (adjustlen) {\n",
" grrange <- end(gr) - start(gr) + 1\n",
" tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
" }\n",
" tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
" allres <- rbind(allres, tmp)\n",
" }\n",
" if (\"ENCL\" %in% featurelist) {\n",
" print(\"Processing ENCL features\")\n",
" load(paste0(datapath, \"/gr/ENCL\", ENCLclunum, \".rda\"))\n",
" tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
" i))\n",
" tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
" if (log2transform) {\n",
" tmp <- log2(tmp + 1)\n",
" }\n",
" if (adjustlen) {\n",
" grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
" 1))\n",
" tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
" }\n",
" tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
" allres <- rbind(allres, tmp)\n",
" }\n",
" if (\"MOTIF_TRANSFAC\" %in% featurelist) {\n",
" print(\"Processing MOTIF_TRANSFAC features\")\n",
" load(paste0(datapath, \"/gr/transfac1.rda\"))\n",
" gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
" tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
" i))\n",
" tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
" if (log2transform) {\n",
" tmp <- log2(tmp + 1)\n",
" }\n",
" if (adjustlen) {\n",
" grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
" 1))\n",
" tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
" }\n",
" tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
" allres <- rbind(allres, tmp)\n",
" load(paste0(datapath, \"/gr/transfac2.rda\"))\n",
" gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
" tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
" i))\n",
" tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
" if (log2transform) {\n",
" tmp <- log2(tmp + 1)\n",
" }\n",
" if (adjustlen) {\n",
" grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
" 1))\n",
" tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
" }\n",
" tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
" allres <- rbind(allres, tmp)\n",
" if (genome %in% c(\"hg19\", \"hg38\")) {\n",
" load(paste0(datapath, \"/gr/transfac3.rda\"))\n",
" gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
" tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
" i))\n",
" tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
" if (log2transform) {\n",
" tmp <- log2(tmp + 1)\n",
" }\n",
" if (adjustlen) {\n",
" grrange <- sapply(gr, function(i) sum(end(i) - \n",
" start(i) + 1))\n",
" tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
" }\n",
" tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
" allres <- rbind(allres, tmp)\n",
" }\n",
" }\n",
" if (\"MOTIF_JASPAR\" %in% featurelist) {\n",
" print(\"Processing MOTIF_JASPAR features\")\n",
" load(paste0(datapath, \"/gr/jaspar1.rda\"))\n",
" gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
" tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
" i))\n",
" tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
" if (log2transform) {\n",
" tmp <- log2(tmp + 1)\n",
" }\n",
" if (adjustlen) {\n",
" grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
" 1))\n",
" tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
" }\n",
" tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
" allres <- rbind(allres, tmp)\n",
" load(paste0(datapath, \"/gr/jaspar2.rda\"))\n",
" gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
" tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
" i))\n",
" tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
" if (log2transform) {\n",
" tmp <- log2(tmp + 1)\n",
" }\n",
" if (adjustlen) {\n",
" grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
" 1))\n",
" tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
" }\n",
" tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
" allres <- rbind(allres, tmp)\n",
" }\n",
" if (\"GSEA\" %in% featurelist) {\n",
" print(\"Processing GSEA features\")\n",
" for (i in GSEAterm) {\n",
" load(paste0(datapath, \"/gr/GSEA\", i, \".rda\"))\n",
" allgr <- gr\n",
" for (sgrn in names(allgr)) {\n",
" gr <- allgr[[sgrn]]\n",
" if (GSEAstarttype == \"TSSup\") {\n",
" grstart <- ifelse(as.character(strand(gr)) == \n",
" \"+\", start(gr) - as.numeric(GSEAstartbp), \n",
" end(gr) + as.numeric(GSEAstartbp))\n",
" }\n",
" else if (GSEAstarttype == \"TSSdown\") {\n",
" grstart <- ifelse(as.character(strand(gr)) == \n",
" \"+\", start(gr) + as.numeric(GSEAstartbp), \n",
" end(gr) - as.numeric(GSEAstartbp))\n",
" }\n",
" else if (GSEAstarttype == \"TESup\") {\n",
" grstart <- ifelse(as.character(strand(gr)) == \n",
" \"+\", end(gr) - as.numeric(GSEAstartbp), start(gr) + \n",
" as.numeric(GSEAstartbp))\n",
" }\n",
" else if (GSEAstarttype == \"TESdown\") {\n",
" grstart <- ifelse(as.character(strand(gr)) == \n",
" \"+\", end(gr) + as.numeric(GSEAstartbp), start(gr) - \n",
" as.numeric(GSEAstartbp))\n",
" }\n",
" if (GSEAendtype == \"TSSup\") {\n",
" grend <- ifelse(as.character(strand(gr)) == \n",
" \"+\", start(gr) - as.numeric(GSEAendbp), end(gr) + \n",
" as.numeric(GSEAendbp))\n",
" }\n",
" else if (GSEAendtype == \"TSSdown\") {\n",
" grend <- ifelse(as.character(strand(gr)) == \n",
" \"+\", start(gr) + as.numeric(GSEAendbp), end(gr) - \n",
" as.numeric(GSEAendbp))\n",
" }\n",
" else if (GSEAendtype == \"TESup\") {\n",
" grend <- ifelse(as.character(strand(gr)) == \n",
" \"+\", end(gr) - as.numeric(GSEAendbp), start(gr) + \n",
" as.numeric(GSEAendbp))\n",
" }\n",
" else if (GSEAendtype == \"TESdown\") {\n",
" grend <- ifelse(as.character(strand(gr)) == \n",
" \"+\", end(gr) + as.numeric(GSEAendbp), start(gr) - \n",
" as.numeric(GSEAendbp))\n",
" }\n",
" ngr <- names(gr)\n",
" gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart, \n",
" grend), end = pmax(grstart, grend)))\n",
" names(gr) <- ngr\n",
" allgr[[sgrn]] <- gr\n",
" }\n",
" gr <- allgr\n",
" tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
" i))\n",
" tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
" if (log2transform) {\n",
" tmp <- log2(tmp + 1)\n",
" }\n",
" if (adjustlen) {\n",
" grrange <- sapply(gr, function(i) sum(end(i) - \n",
" start(i) + 1))\n",
" tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
" }\n",
" tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
" allres <- rbind(allres, tmp)\n",
" }\n",
" }\n",
" if (\"Custom\" %in% featurelist) {\n",
" print(\"Processing custom features\")\n",
" gr <- read.table(customfeature, as.is = T, sep = \"\\t\")\n",
" gr <- GRanges(seqnames = gr[, 1], IRanges(start = gr[, \n",
" 2], end = gr[, 3]))\n",
" tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
" i))\n",
" tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
" if (log2transform) {\n",
" tmp <- log2(tmp + 1)\n",
" }\n",
" if (adjustlen) {\n",
" grrange <- end(gr) - start(gr) + 1\n",
" tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
" }\n",
" tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
" allres <- rbind(allres, tmp)\n",
" }\n",
" allres\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr1_gl000192_random, chr7_gl000195_random, chr8_gl000197_random, chr9_gl000201_random, chrUn_gl000220, chrUn_gl000240\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr7_gl000195_random, chr17_gl000205_random, chr19_gl000208_random, chrUn_gl000220, chrUn_gl000222\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr7_gl000195_random, chrUn_gl000219\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr17_gl000205_random\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr1_gl000191_random, chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr7_gl000195_random, chrUn_gl000215, chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000216, chrUn_gl000225\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000220, chrUn_gl000221, chrUn_gl000223\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000239\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000220, chrUn_gl000228\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr7_gl000195_random, chr17_gl000205_random, chr17_gl000206_random, chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr17_gl000205_random\n",
" - in 'y': chr21, chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr7_gl000195_random, chrUn_gl000219, chrUn_gl000220\n",
" - in 'y': chr15, chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr17_gl000205_random\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223, chrUn_gl000225\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr17_gl000205_random, chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr7_gl000195_random, chr11_gl000202_random, chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr17_gl000205_random, chrUn_gl000214, chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr7_gl000195_random, chrUn_gl000220, chrUn_gl000242\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000220\n",
" - in 'y': chr18\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr7_gl000195_random, chrUn_gl000220, chrUn_gl000225\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrUn_gl000220\n",
" - in 'y': chr14, chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr7_gl000195_random, chr9_gl000199_random, chr17_gl000205_random, chrUn_gl000212, chrUn_gl000219, chrUn_gl000220\n",
" - in 'y': chrY\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1] \"Processing MOTIF_JASPAR features\"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr1_gl000192_random, chr7_gl000195_random, chr8_gl000197_random, chr9_gl000201_random, chrUn_gl000220, chrUn_gl000240\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr7_gl000195_random, chr17_gl000205_random, chr19_gl000208_random, chrUn_gl000220, chrUn_gl000222\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr7_gl000195_random, chrUn_gl000219\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr17_gl000205_random\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr1_gl000191_random, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr7_gl000195_random, chrUn_gl000215, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chrUn_gl000216, chrUn_gl000225\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chrUn_gl000220, chrUn_gl000221, chrUn_gl000223\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000239\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chrUn_gl000220, chrUn_gl000228\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr7_gl000195_random, chr17_gl000205_random, chr17_gl000206_random, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr21, chrY\n",
" - in 'y': chrM, chr17_gl000205_random\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr15, chrY\n",
" - in 'y': chrM, chr7_gl000195_random, chrUn_gl000219, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr17_gl000205_random\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223, chrUn_gl000225\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr7_gl000195_random, chr11_gl000202_random, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr17_gl000205_random, chrUn_gl000214, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr7_gl000195_random, chrUn_gl000220, chrUn_gl000242\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr18\n",
" - in 'y': chrM, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr18, chrY\n",
" - in 'y': chrM\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrX, chrY\n",
" - in 'y': chrM\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr20, chrX, chrY\n",
" - in 'y': chrM\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr15\n",
" - in 'y': chrM\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr7_gl000195_random, chrUn_gl000220, chrUn_gl000225\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chr14, chrY\n",
" - in 'y': chrM, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n",
"“Each of the 2 combined objects has sequence levels not in the other:\n",
" - in 'x': chrY\n",
" - in 'y': chrM, chr7_gl000195_random, chr9_gl000199_random, chr17_gl000205_random, chrUn_gl000212, chrUn_gl000219, chrUn_gl000220\n",
" Make sure to always combine/compare objects based on the same reference\n",
" genome (use suppressWarnings() to suppress this warning).”"
]
}
],
"source": [
"df_out <- SCRATsummary(dir = \"../../input/sc-bams_nodup/\", \n",
" genome = \"hg19\",\n",
" featurelist=\"MOTIF_JASPAR\",\n",
" log2transform = FALSE, adjustlen = FALSE)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"end_time <- Sys.time()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Time difference of 6.359369 hours"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"end_time - start_time"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\t- 519
\n",
"\t- 2034
\n",
"
\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 519\n",
"\\item 2034\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 519\n",
"2. 2034\n",
"\n",
"\n"
],
"text/plain": [
"[1] 519 2034"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" | BM1077-CLP-Frozen-160106-13.dedup.st.bam | BM1077-CLP-Frozen-160106-14.dedup.st.bam | BM1077-CLP-Frozen-160106-2.dedup.st.bam | BM1077-CLP-Frozen-160106-21.dedup.st.bam | BM1077-CLP-Frozen-160106-27.dedup.st.bam |
\n",
"\n",
"\tMOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |
\n",
"\tMOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |
\n",
"\tMOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |
\n",
"\tMOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |
\n",
"\tMOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |
\n",
"\n",
"
\n"
],
"text/latex": [
"\\begin{tabular}{r|lllll}\n",
" & BM1077-CLP-Frozen-160106-13.dedup.st.bam & BM1077-CLP-Frozen-160106-14.dedup.st.bam & BM1077-CLP-Frozen-160106-2.dedup.st.bam & BM1077-CLP-Frozen-160106-21.dedup.st.bam & BM1077-CLP-Frozen-160106-27.dedup.st.bam\\\\\n",
"\\hline\n",
"\tMOTIF:MA0002.2:RUNX1 & 1872.6592 & 1327.8576 & 1626.8382 & 1472.3032 & 2131.500 \\\\\n",
"\tMOTIF:MA0003.3:TFAP2A & 3489.9557 & 2505.1335 & 3281.2500 & 2736.8805 & 3790.288 \\\\\n",
"\tMOTIF:MA0004.1:Arnt & 1467.4838 & 1115.6742 & 1484.3750 & 1169.8251 & 1443.919 \\\\\n",
"\tMOTIF:MA0006.1:Ahr::Arnt & 2659.1760 & 2060.2327 & 2693.0147 & 2099.1254 & 2819.080 \\\\\n",
"\tMOTIF:MA0007.3:Ar & 146.4079 & 102.6694 & 119.4853 & 116.6181 & 124.624 \\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"| | BM1077-CLP-Frozen-160106-13.dedup.st.bam | BM1077-CLP-Frozen-160106-14.dedup.st.bam | BM1077-CLP-Frozen-160106-2.dedup.st.bam | BM1077-CLP-Frozen-160106-21.dedup.st.bam | BM1077-CLP-Frozen-160106-27.dedup.st.bam |\n",
"|---|---|---|---|---|---|\n",
"| MOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |\n",
"| MOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |\n",
"| MOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |\n",
"| MOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |\n",
"| MOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |\n",
"\n"
],
"text/plain": [
" BM1077-CLP-Frozen-160106-13.dedup.st.bam\n",
"MOTIF:MA0002.2:RUNX1 1872.6592 \n",
"MOTIF:MA0003.3:TFAP2A 3489.9557 \n",
"MOTIF:MA0004.1:Arnt 1467.4838 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2659.1760 \n",
"MOTIF:MA0007.3:Ar 146.4079 \n",
" BM1077-CLP-Frozen-160106-14.dedup.st.bam\n",
"MOTIF:MA0002.2:RUNX1 1327.8576 \n",
"MOTIF:MA0003.3:TFAP2A 2505.1335 \n",
"MOTIF:MA0004.1:Arnt 1115.6742 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2060.2327 \n",
"MOTIF:MA0007.3:Ar 102.6694 \n",
" BM1077-CLP-Frozen-160106-2.dedup.st.bam\n",
"MOTIF:MA0002.2:RUNX1 1626.8382 \n",
"MOTIF:MA0003.3:TFAP2A 3281.2500 \n",
"MOTIF:MA0004.1:Arnt 1484.3750 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2693.0147 \n",
"MOTIF:MA0007.3:Ar 119.4853 \n",
" BM1077-CLP-Frozen-160106-21.dedup.st.bam\n",
"MOTIF:MA0002.2:RUNX1 1472.3032 \n",
"MOTIF:MA0003.3:TFAP2A 2736.8805 \n",
"MOTIF:MA0004.1:Arnt 1169.8251 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2099.1254 \n",
"MOTIF:MA0007.3:Ar 116.6181 \n",
" BM1077-CLP-Frozen-160106-27.dedup.st.bam\n",
"MOTIF:MA0002.2:RUNX1 2131.500 \n",
"MOTIF:MA0003.3:TFAP2A 3790.288 \n",
"MOTIF:MA0004.1:Arnt 1443.919 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2819.080 \n",
"MOTIF:MA0007.3:Ar 124.624 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dim(df_out)\n",
"df_out[1:5,1:5]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\t- 519
\n",
"\t- 2034
\n",
"
\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 519\n",
"\\item 2034\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 519\n",
"2. 2034\n",
"\n",
"\n"
],
"text/plain": [
"[1] 519 2034"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" | BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 |
\n",
"\n",
"\tMOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |
\n",
"\tMOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |
\n",
"\tMOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |
\n",
"\tMOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |
\n",
"\tMOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |
\n",
"\n",
"
\n"
],
"text/latex": [
"\\begin{tabular}{r|lllll}\n",
" & BM1077-CLP-Frozen-160106-13 & BM1077-CLP-Frozen-160106-14 & BM1077-CLP-Frozen-160106-2 & BM1077-CLP-Frozen-160106-21 & BM1077-CLP-Frozen-160106-27\\\\\n",
"\\hline\n",
"\tMOTIF:MA0002.2:RUNX1 & 1872.6592 & 1327.8576 & 1626.8382 & 1472.3032 & 2131.500 \\\\\n",
"\tMOTIF:MA0003.3:TFAP2A & 3489.9557 & 2505.1335 & 3281.2500 & 2736.8805 & 3790.288 \\\\\n",
"\tMOTIF:MA0004.1:Arnt & 1467.4838 & 1115.6742 & 1484.3750 & 1169.8251 & 1443.919 \\\\\n",
"\tMOTIF:MA0006.1:Ahr::Arnt & 2659.1760 & 2060.2327 & 2693.0147 & 2099.1254 & 2819.080 \\\\\n",
"\tMOTIF:MA0007.3:Ar & 146.4079 & 102.6694 & 119.4853 & 116.6181 & 124.624 \\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"| | BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 |\n",
"|---|---|---|---|---|---|\n",
"| MOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |\n",
"| MOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |\n",
"| MOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |\n",
"| MOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |\n",
"| MOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |\n",
"\n"
],
"text/plain": [
" BM1077-CLP-Frozen-160106-13\n",
"MOTIF:MA0002.2:RUNX1 1872.6592 \n",
"MOTIF:MA0003.3:TFAP2A 3489.9557 \n",
"MOTIF:MA0004.1:Arnt 1467.4838 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2659.1760 \n",
"MOTIF:MA0007.3:Ar 146.4079 \n",
" BM1077-CLP-Frozen-160106-14 BM1077-CLP-Frozen-160106-2\n",
"MOTIF:MA0002.2:RUNX1 1327.8576 1626.8382 \n",
"MOTIF:MA0003.3:TFAP2A 2505.1335 3281.2500 \n",
"MOTIF:MA0004.1:Arnt 1115.6742 1484.3750 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2060.2327 2693.0147 \n",
"MOTIF:MA0007.3:Ar 102.6694 119.4853 \n",
" BM1077-CLP-Frozen-160106-21\n",
"MOTIF:MA0002.2:RUNX1 1472.3032 \n",
"MOTIF:MA0003.3:TFAP2A 2736.8805 \n",
"MOTIF:MA0004.1:Arnt 1169.8251 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2099.1254 \n",
"MOTIF:MA0007.3:Ar 116.6181 \n",
" BM1077-CLP-Frozen-160106-27\n",
"MOTIF:MA0002.2:RUNX1 2131.500 \n",
"MOTIF:MA0003.3:TFAP2A 3790.288 \n",
"MOTIF:MA0004.1:Arnt 1443.919 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2819.080 \n",
"MOTIF:MA0007.3:Ar 124.624 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"colnames(df_out) = sapply(strsplit(colnames(df_out), \"\\\\.\"),'[',1)\n",
"dim(df_out)\n",
"df_out[1:5,1:5]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"if(! all(colnames(df_out) == rownames(metadata))){\n",
" df_out = df_out[,rownames(metadata)]\n",
" dim(df_out)\n",
" df_out[1:5,1:5]\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\t- 519
\n",
"\t- 2034
\n",
"
\n"
],
"text/latex": [
"\\begin{enumerate*}\n",
"\\item 519\n",
"\\item 2034\n",
"\\end{enumerate*}\n"
],
"text/markdown": [
"1. 519\n",
"2. 2034\n",
"\n",
"\n"
],
"text/plain": [
"[1] 519 2034"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" | BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 |
\n",
"\n",
"\tMOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |
\n",
"\tMOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |
\n",
"\tMOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |
\n",
"\tMOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |
\n",
"\tMOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |
\n",
"\n",
"
\n"
],
"text/latex": [
"\\begin{tabular}{r|lllll}\n",
" & BM1077-CLP-Frozen-160106-13 & BM1077-CLP-Frozen-160106-14 & BM1077-CLP-Frozen-160106-2 & BM1077-CLP-Frozen-160106-21 & BM1077-CLP-Frozen-160106-27\\\\\n",
"\\hline\n",
"\tMOTIF:MA0002.2:RUNX1 & 1872.6592 & 1327.8576 & 1626.8382 & 1472.3032 & 2131.500 \\\\\n",
"\tMOTIF:MA0003.3:TFAP2A & 3489.9557 & 2505.1335 & 3281.2500 & 2736.8805 & 3790.288 \\\\\n",
"\tMOTIF:MA0004.1:Arnt & 1467.4838 & 1115.6742 & 1484.3750 & 1169.8251 & 1443.919 \\\\\n",
"\tMOTIF:MA0006.1:Ahr::Arnt & 2659.1760 & 2060.2327 & 2693.0147 & 2099.1254 & 2819.080 \\\\\n",
"\tMOTIF:MA0007.3:Ar & 146.4079 & 102.6694 & 119.4853 & 116.6181 & 124.624 \\\\\n",
"\\end{tabular}\n"
],
"text/markdown": [
"\n",
"| | BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 |\n",
"|---|---|---|---|---|---|\n",
"| MOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |\n",
"| MOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |\n",
"| MOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |\n",
"| MOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |\n",
"| MOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |\n",
"\n"
],
"text/plain": [
" BM1077-CLP-Frozen-160106-13\n",
"MOTIF:MA0002.2:RUNX1 1872.6592 \n",
"MOTIF:MA0003.3:TFAP2A 3489.9557 \n",
"MOTIF:MA0004.1:Arnt 1467.4838 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2659.1760 \n",
"MOTIF:MA0007.3:Ar 146.4079 \n",
" BM1077-CLP-Frozen-160106-14 BM1077-CLP-Frozen-160106-2\n",
"MOTIF:MA0002.2:RUNX1 1327.8576 1626.8382 \n",
"MOTIF:MA0003.3:TFAP2A 2505.1335 3281.2500 \n",
"MOTIF:MA0004.1:Arnt 1115.6742 1484.3750 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2060.2327 2693.0147 \n",
"MOTIF:MA0007.3:Ar 102.6694 119.4853 \n",
" BM1077-CLP-Frozen-160106-21\n",
"MOTIF:MA0002.2:RUNX1 1472.3032 \n",
"MOTIF:MA0003.3:TFAP2A 2736.8805 \n",
"MOTIF:MA0004.1:Arnt 1169.8251 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2099.1254 \n",
"MOTIF:MA0007.3:Ar 116.6181 \n",
" BM1077-CLP-Frozen-160106-27\n",
"MOTIF:MA0002.2:RUNX1 2131.500 \n",
"MOTIF:MA0003.3:TFAP2A 3790.288 \n",
"MOTIF:MA0004.1:Arnt 1443.919 \n",
"MOTIF:MA0006.1:Ahr::Arnt 2819.080 \n",
"MOTIF:MA0007.3:Ar 124.624 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"dim(df_out)\n",
"df_out[1:5,1:5]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"saveRDS(df_out, file = '../../output/feature_matrices/FM_SCRAT_buenrostro2018_motifs.rds')"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"R version 3.5.1 (2018-07-02)\n",
"Platform: x86_64-conda_cos6-linux-gnu (64-bit)\n",
"Running under: CentOS Linux 7 (Core)\n",
"\n",
"Matrix products: default\n",
"BLAS/LAPACK: /data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_SCRAT/lib/R/lib/libRblas.so\n",
"\n",
"locale:\n",
" [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C \n",
" [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 \n",
" [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 \n",
" [7] LC_PAPER=en_US.UTF-8 LC_NAME=C \n",
" [9] LC_ADDRESS=C LC_TELEPHONE=C \n",
"[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C \n",
"\n",
"attached base packages:\n",
"[1] stats4 parallel stats graphics grDevices utils datasets \n",
"[8] methods base \n",
"\n",
"other attached packages:\n",
" [1] SCRAT_0.99.0 SCRATdatahg19_0.99.1 \n",
" [3] GenomicAlignments_1.18.1 Rsamtools_1.34.0 \n",
" [5] Biostrings_2.50.2 XVector_0.22.0 \n",
" [7] SummarizedExperiment_1.12.0 DelayedArray_0.8.0 \n",
" [9] BiocParallel_1.16.6 matrixStats_0.54.0 \n",
"[11] Biobase_2.42.0 GenomicRanges_1.34.0 \n",
"[13] GenomeInfoDb_1.18.1 IRanges_2.16.0 \n",
"[15] S4Vectors_0.20.1 BiocGenerics_0.28.0 \n",
"[17] usethis_1.5.0 devtools_2.0.2 \n",
"\n",
"loaded via a namespace (and not attached):\n",
" [1] tsne_0.1-3 bitops_1.0-6 fs_1.2.7 \n",
" [4] RColorBrewer_1.1-2 rprojroot_1.3-2 repr_0.19.2 \n",
" [7] tools_3.5.1 backports_1.1.4 R6_2.4.0 \n",
"[10] DT_0.5 KernSmooth_2.23-15 lazyeval_0.2.2 \n",
"[13] colorspace_1.4-1 withr_2.1.2 tidyselect_0.2.5 \n",
"[16] prettyunits_1.0.2 processx_3.3.0 compiler_3.5.1 \n",
"[19] cli_1.1.0 desc_1.2.0 caTools_1.17.1.2 \n",
"[22] scales_1.0.0 callr_3.2.0 pbdZMQ_0.3-3 \n",
"[25] stringr_1.4.0 digest_0.6.18 shinyBS_0.61 \n",
"[28] scatterD3_0.9 dbscan_1.1-3 base64enc_0.1-3 \n",
"[31] pkgconfig_2.0.2 htmltools_0.3.6 sessioninfo_1.1.1 \n",
"[34] htmlwidgets_1.3 rlang_0.3.4 shiny_1.3.2 \n",
"[37] jsonlite_1.6 mclust_5.4.3 gtools_3.8.1 \n",
"[40] dplyr_0.8.0.1 RCurl_1.95-4.11 magrittr_1.5 \n",
"[43] GenomeInfoDbData_1.2.0 Matrix_1.2-17 Rcpp_1.0.1 \n",
"[46] IRkernel_0.8.15 munsell_0.5.0 stringi_1.4.3 \n",
"[49] zlibbioc_1.28.0 pkgbuild_1.0.3 gplots_3.0.1.1 \n",
"[52] plyr_1.8.4 grid_3.5.1 gdata_2.18.0 \n",
"[55] promises_1.0.1 crayon_1.3.4 lattice_0.20-38 \n",
"[58] IRdisplay_0.7.0 ps_1.3.0 pillar_1.3.1 \n",
"[61] uuid_0.1-2 reshape2_1.4.3 pkgload_1.0.2 \n",
"[64] glue_1.3.1 evaluate_0.13 remotes_2.0.4 \n",
"[67] httpuv_1.5.1 gtable_0.3.0 purrr_0.3.2 \n",
"[70] assertthat_0.2.1 ggplot2_3.1.1 mime_0.6 \n",
"[73] xtable_1.8-4 later_0.8.0 tibble_2.1.1 \n",
"[76] pheatmap_1.0.12 memoise_1.1.0 ellipse_0.4.1 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sessionInfo()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"save.image(file = 'SCRAT_buenrostro2018.RData')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R [conda env:ATACseq_SCRAT]",
"language": "R",
"name": "conda-env-ATACseq_SCRAT-r"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.5.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}