{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Installation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`devtools::install_github(\"zji90/SCRATdatahg19\")` \n", "`source(\"https://raw.githubusercontent.com/zji90/SCRATdata/master/installcode.R\")` " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Import packages" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading required package: BiocGenerics\n", "Loading required package: parallel\n", "\n", "Attaching package: ‘BiocGenerics’\n", "\n", "The following objects are masked from ‘package:parallel’:\n", "\n", " clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\n", " clusterExport, clusterMap, parApply, parCapply, parLapply,\n", " parLapplyLB, parRapply, parSapply, parSapplyLB\n", "\n", "The following objects are masked from ‘package:stats’:\n", "\n", " IQR, mad, sd, var, xtabs\n", "\n", "The following objects are masked from ‘package:base’:\n", "\n", " anyDuplicated, append, as.data.frame, basename, cbind, colMeans,\n", " colnames, colSums, dirname, do.call, duplicated, eval, evalq,\n", " Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,\n", " lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,\n", " pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,\n", " rowSums, sapply, setdiff, sort, table, tapply, union, unique,\n", " unsplit, which, which.max, which.min\n", "\n", "Loading required package: S4Vectors\n", "Loading required package: stats4\n", "\n", "Attaching package: ‘S4Vectors’\n", "\n", "The following object is masked from ‘package:base’:\n", "\n", " expand.grid\n", "\n", "Loading required package: IRanges\n", "Loading required package: GenomeInfoDb\n", "Loading required package: GenomicRanges\n", "Loading required package: SummarizedExperiment\n", "Loading required package: Biobase\n", "Welcome to Bioconductor\n", "\n", " Vignettes contain introductory material; view with\n", " 'browseVignettes()'. To cite Bioconductor, see\n", " 'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n", "\n", "Loading required package: DelayedArray\n", "Loading required package: matrixStats\n", "\n", "Attaching package: ‘matrixStats’\n", "\n", "The following objects are masked from ‘package:Biobase’:\n", "\n", " anyMissing, rowMedians\n", "\n", "Loading required package: BiocParallel\n", "\n", "Attaching package: ‘DelayedArray’\n", "\n", "The following objects are masked from ‘package:matrixStats’:\n", "\n", " colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges\n", "\n", "The following objects are masked from ‘package:base’:\n", "\n", " aperm, apply\n", "\n", "Loading required package: Biostrings\n", "Loading required package: XVector\n", "\n", "Attaching package: ‘Biostrings’\n", "\n", "The following object is masked from ‘package:DelayedArray’:\n", "\n", " type\n", "\n", "The following object is masked from ‘package:base’:\n", "\n", " strsplit\n", "\n", "Loading required package: Rsamtools\n", "Warning message:\n", "“replacing previous import ‘DT::dataTableOutput’ by ‘shiny::dataTableOutput’ when loading ‘SCRAT’”Warning message:\n", "“replacing previous import ‘DT::renderDataTable’ by ‘shiny::renderDataTable’ when loading ‘SCRAT’”Warning message:\n", "“replacing previous import ‘mclust::em’ by ‘shiny::em’ when loading ‘SCRAT’”" ] } ], "source": [ "library(devtools)\n", "library(GenomicAlignments)\n", "library(Rsamtools)\n", "library(SCRATdatahg19)\n", "library(SCRAT)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Obtain Feature Matrix" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "start_time = Sys.time()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "metadata <- read.table('../../input/metadata.tsv',\n", " header = TRUE,\n", " stringsAsFactors=FALSE,quote=\"\",row.names=1)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "SCRATsummary <- function (dir = \"\", genome, bamfile = NULL, singlepair = \"automated\", \n", " removeblacklist = T, log2transform = T, adjustlen = T, featurelist = c(\"GENE\", \n", " \"ENCL\", \"MOTIF_TRANSFAC\", \"MOTIF_JASPAR\", \"GSEA\"), customfeature = NULL, \n", " Genestarttype = \"TSSup\", Geneendtype = \"TSSdown\", Genestartbp = 3000, \n", " Geneendbp = 1000, ENCLclunum = 2000, Motifflank = 100, GSEAterm = \"c5.bp\", \n", " GSEAstarttype = \"TSSup\", GSEAendtype = \"TSSdown\", GSEAstartbp = 3000, \n", " GSEAendbp = 1000) \n", "{\n", " if (is.null(bamfile)) {\n", " bamfile <- list.files(dir, pattern = \".bam$\")\n", " }\n", " datapath <- system.file(\"extdata\", package = paste0(\"SCRATdata\", \n", " genome))\n", " bamdata <- list()\n", " for (i in bamfile) {\n", " filepath <- file.path(dir, i)\n", " if (singlepair == \"automated\") {\n", " bamfile <- BamFile(filepath)\n", " tmpsingle <- readGAlignments(bamfile)\n", " tmppair <- readGAlignmentPairs(bamfile)\n", " pairendtf <- testPairedEndBam(bamfile)\n", " if (pairendtf) {\n", " tmp <- tmppair\n", " startpos <- pmin(start(first(tmp)), start(last(tmp)))\n", " endpos <- pmax(end(first(tmp)), end(last(tmp)))\n", " id <- which(!is.na(as.character(seqnames(tmp))))\n", " tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))\n", " }\n", " else {\n", " tmp <- GRanges(tmpsingle)\n", " }\n", " }\n", " else if (singlepair == \"single\") {\n", " tmp <- GRanges(readGAlignments(filepath))\n", " }\n", " else if (singlepair == \"pair\") {\n", " tmp <- readGAlignmentPairs(filepath)\n", " startpos <- pmin(start(first(tmp)), start(last(tmp)))\n", " endpos <- pmax(end(first(tmp)), end(last(tmp)))\n", " id <- which(!is.na(as.character(seqnames(tmp))))\n", " tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))\n", " }\n", " if (removeblacklist) {\n", " load(paste0(datapath, \"/gr/blacklist.rda\"))\n", " tmp <- tmp[-as.matrix(findOverlaps(tmp, gr))[, 1], \n", " ]\n", " }\n", " bamdata[[i]] <- tmp\n", " }\n", " bamsummary <- sapply(bamdata, length)\n", " allres <- NULL\n", " datapath <- system.file(\"extdata\", package = paste0(\"SCRATdata\", \n", " genome))\n", " if (\"GENE\" %in% featurelist) {\n", " print(\"Processing GENE features\")\n", " load(paste0(datapath, \"/gr/generegion.rda\"))\n", " if (Genestarttype == \"TSSup\") {\n", " grstart <- ifelse(as.character(strand(gr)) == \"+\", \n", " start(gr) - as.numeric(Genestartbp), end(gr) + \n", " as.numeric(Genestartbp))\n", " }\n", " else if (Genestarttype == \"TSSdown\") {\n", " grstart <- ifelse(as.character(strand(gr)) == \"+\", \n", " start(gr) + as.numeric(Genestartbp), end(gr) - \n", " as.numeric(Genestartbp))\n", " }\n", " else if (Genestarttype == \"TESup\") {\n", " grstart <- ifelse(as.character(strand(gr)) == \"+\", \n", " end(gr) - as.numeric(Genestartbp), start(gr) + \n", " as.numeric(Genestartbp))\n", " }\n", " else if (Genestarttype == \"TESdown\") {\n", " grstart <- ifelse(as.character(strand(gr)) == \"+\", \n", " end(gr) + as.numeric(Genestartbp), start(gr) - \n", " as.numeric(Genestartbp))\n", " }\n", " if (Geneendtype == \"TSSup\") {\n", " grend <- ifelse(as.character(strand(gr)) == \"+\", \n", " start(gr) - as.numeric(Geneendbp), end(gr) + \n", " as.numeric(Geneendbp))\n", " }\n", " else if (Geneendtype == \"TSSdown\") {\n", " grend <- ifelse(as.character(strand(gr)) == \"+\", \n", " start(gr) + as.numeric(Geneendbp), end(gr) - \n", " as.numeric(Geneendbp))\n", " }\n", " else if (Geneendtype == \"TESup\") {\n", " grend <- ifelse(as.character(strand(gr)) == \"+\", \n", " end(gr) - as.numeric(Geneendbp), start(gr) + \n", " as.numeric(Geneendbp))\n", " }\n", " else if (Geneendtype == \"TESdown\") {\n", " grend <- ifelse(as.character(strand(gr)) == \"+\", \n", " end(gr) + as.numeric(Geneendbp), start(gr) - \n", " as.numeric(Geneendbp))\n", " }\n", " ngr <- names(gr)\n", " gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart, \n", " grend), end = pmax(grstart, grend)))\n", " names(gr) <- ngr\n", " tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n", " i))\n", " tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n", " if (log2transform) {\n", " tmp <- log2(tmp + 1)\n", " }\n", " if (adjustlen) {\n", " grrange <- end(gr) - start(gr) + 1\n", " tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n", " }\n", " tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n", " allres <- rbind(allres, tmp)\n", " }\n", " if (\"ENCL\" %in% featurelist) {\n", " print(\"Processing ENCL features\")\n", " load(paste0(datapath, \"/gr/ENCL\", ENCLclunum, \".rda\"))\n", " tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n", " i))\n", " tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n", " if (log2transform) {\n", " tmp <- log2(tmp + 1)\n", " }\n", " if (adjustlen) {\n", " grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n", " 1))\n", " tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n", " }\n", " tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n", " allres <- rbind(allres, tmp)\n", " }\n", " if (\"MOTIF_TRANSFAC\" %in% featurelist) {\n", " print(\"Processing MOTIF_TRANSFAC features\")\n", " load(paste0(datapath, \"/gr/transfac1.rda\"))\n", " gr <- flank(gr, as.numeric(Motifflank), both = T)\n", " tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n", " i))\n", " tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n", " if (log2transform) {\n", " tmp <- log2(tmp + 1)\n", " }\n", " if (adjustlen) {\n", " grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n", " 1))\n", " tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n", " }\n", " tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n", " allres <- rbind(allres, tmp)\n", " load(paste0(datapath, \"/gr/transfac2.rda\"))\n", " gr <- flank(gr, as.numeric(Motifflank), both = T)\n", " tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n", " i))\n", " tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n", " if (log2transform) {\n", " tmp <- log2(tmp + 1)\n", " }\n", " if (adjustlen) {\n", " grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n", " 1))\n", " tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n", " }\n", " tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n", " allres <- rbind(allres, tmp)\n", " if (genome %in% c(\"hg19\", \"hg38\")) {\n", " load(paste0(datapath, \"/gr/transfac3.rda\"))\n", " gr <- flank(gr, as.numeric(Motifflank), both = T)\n", " tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n", " i))\n", " tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n", " if (log2transform) {\n", " tmp <- log2(tmp + 1)\n", " }\n", " if (adjustlen) {\n", " grrange <- sapply(gr, function(i) sum(end(i) - \n", " start(i) + 1))\n", " tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n", " }\n", " tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n", " allres <- rbind(allres, tmp)\n", " }\n", " }\n", " if (\"MOTIF_JASPAR\" %in% featurelist) {\n", " print(\"Processing MOTIF_JASPAR features\")\n", " load(paste0(datapath, \"/gr/jaspar1.rda\"))\n", " gr <- flank(gr, as.numeric(Motifflank), both = T)\n", " tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n", " i))\n", " tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n", " if (log2transform) {\n", " tmp <- log2(tmp + 1)\n", " }\n", " if (adjustlen) {\n", " grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n", " 1))\n", " tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n", " }\n", " tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n", " allres <- rbind(allres, tmp)\n", " load(paste0(datapath, \"/gr/jaspar2.rda\"))\n", " gr <- flank(gr, as.numeric(Motifflank), both = T)\n", " tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n", " i))\n", " tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n", " if (log2transform) {\n", " tmp <- log2(tmp + 1)\n", " }\n", " if (adjustlen) {\n", " grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n", " 1))\n", " tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n", " }\n", " tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n", " allres <- rbind(allres, tmp)\n", " }\n", " if (\"GSEA\" %in% featurelist) {\n", " print(\"Processing GSEA features\")\n", " for (i in GSEAterm) {\n", " load(paste0(datapath, \"/gr/GSEA\", i, \".rda\"))\n", " allgr <- gr\n", " for (sgrn in names(allgr)) {\n", " gr <- allgr[[sgrn]]\n", " if (GSEAstarttype == \"TSSup\") {\n", " grstart <- ifelse(as.character(strand(gr)) == \n", " \"+\", start(gr) - as.numeric(GSEAstartbp), \n", " end(gr) + as.numeric(GSEAstartbp))\n", " }\n", " else if (GSEAstarttype == \"TSSdown\") {\n", " grstart <- ifelse(as.character(strand(gr)) == \n", " \"+\", start(gr) + as.numeric(GSEAstartbp), \n", " end(gr) - as.numeric(GSEAstartbp))\n", " }\n", " else if (GSEAstarttype == \"TESup\") {\n", " grstart <- ifelse(as.character(strand(gr)) == \n", " \"+\", end(gr) - as.numeric(GSEAstartbp), start(gr) + \n", " as.numeric(GSEAstartbp))\n", " }\n", " else if (GSEAstarttype == \"TESdown\") {\n", " grstart <- ifelse(as.character(strand(gr)) == \n", " \"+\", end(gr) + as.numeric(GSEAstartbp), start(gr) - \n", " as.numeric(GSEAstartbp))\n", " }\n", " if (GSEAendtype == \"TSSup\") {\n", " grend <- ifelse(as.character(strand(gr)) == \n", " \"+\", start(gr) - as.numeric(GSEAendbp), end(gr) + \n", " as.numeric(GSEAendbp))\n", " }\n", " else if (GSEAendtype == \"TSSdown\") {\n", " grend <- ifelse(as.character(strand(gr)) == \n", " \"+\", start(gr) + as.numeric(GSEAendbp), end(gr) - \n", " as.numeric(GSEAendbp))\n", " }\n", " else if (GSEAendtype == \"TESup\") {\n", " grend <- ifelse(as.character(strand(gr)) == \n", " \"+\", end(gr) - as.numeric(GSEAendbp), start(gr) + \n", " as.numeric(GSEAendbp))\n", " }\n", " else if (GSEAendtype == \"TESdown\") {\n", " grend <- ifelse(as.character(strand(gr)) == \n", " \"+\", end(gr) + as.numeric(GSEAendbp), start(gr) - \n", " as.numeric(GSEAendbp))\n", " }\n", " ngr <- names(gr)\n", " gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart, \n", " grend), end = pmax(grstart, grend)))\n", " names(gr) <- ngr\n", " allgr[[sgrn]] <- gr\n", " }\n", " gr <- allgr\n", " tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n", " i))\n", " tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n", " if (log2transform) {\n", " tmp <- log2(tmp + 1)\n", " }\n", " if (adjustlen) {\n", " grrange <- sapply(gr, function(i) sum(end(i) - \n", " start(i) + 1))\n", " tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n", " }\n", " tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n", " allres <- rbind(allres, tmp)\n", " }\n", " }\n", " if (\"Custom\" %in% featurelist) {\n", " print(\"Processing custom features\")\n", " gr <- read.table(customfeature, as.is = T, sep = \"\\t\")\n", " gr <- GRanges(seqnames = gr[, 1], IRanges(start = gr[, \n", " 2], end = gr[, 3]))\n", " tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n", " i))\n", " tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n", " if (log2transform) {\n", " tmp <- log2(tmp + 1)\n", " }\n", " if (adjustlen) {\n", " grrange <- end(gr) - start(gr) + 1\n", " tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n", " }\n", " tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n", " allres <- rbind(allres, tmp)\n", " }\n", " allres\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr1_gl000192_random, chr7_gl000195_random, chr8_gl000197_random, chr9_gl000201_random, chrUn_gl000220, chrUn_gl000240\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr7_gl000195_random, chr17_gl000205_random, chr19_gl000208_random, chrUn_gl000220, chrUn_gl000222\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr7_gl000195_random, chrUn_gl000219\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr17_gl000205_random\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr1_gl000191_random, chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr7_gl000195_random, chrUn_gl000215, chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000216, chrUn_gl000225\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000220, chrUn_gl000221, chrUn_gl000223\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000239\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000220, chrUn_gl000228\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr7_gl000195_random, chr17_gl000205_random, chr17_gl000206_random, chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr17_gl000205_random\n", " - in 'y': chr21, chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr7_gl000195_random, chrUn_gl000219, chrUn_gl000220\n", " - in 'y': chr15, chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr17_gl000205_random\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223, chrUn_gl000225\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr17_gl000205_random, chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr7_gl000195_random, chr11_gl000202_random, chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr17_gl000205_random, chrUn_gl000214, chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr7_gl000195_random, chrUn_gl000220, chrUn_gl000242\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000220\n", " - in 'y': chr18\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr7_gl000195_random, chrUn_gl000220, chrUn_gl000225\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrUn_gl000220\n", " - in 'y': chr14, chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr7_gl000195_random, chr9_gl000199_random, chr17_gl000205_random, chrUn_gl000212, chrUn_gl000219, chrUn_gl000220\n", " - in 'y': chrY\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[1] \"Processing MOTIF_JASPAR features\"\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr1_gl000192_random, chr7_gl000195_random, chr8_gl000197_random, chr9_gl000201_random, chrUn_gl000220, chrUn_gl000240\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr7_gl000195_random, chr17_gl000205_random, chr19_gl000208_random, chrUn_gl000220, chrUn_gl000222\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr7_gl000195_random, chrUn_gl000219\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr17_gl000205_random\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr1_gl000191_random, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr7_gl000195_random, chrUn_gl000215, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chrUn_gl000216, chrUn_gl000225\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chrUn_gl000220, chrUn_gl000221, chrUn_gl000223\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000239\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr1_gl000192_random, chr17_gl000205_random, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chrUn_gl000220, chrUn_gl000228\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr7_gl000195_random, chr17_gl000205_random, chr17_gl000206_random, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr21, chrY\n", " - in 'y': chrM, chr17_gl000205_random\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr15, chrY\n", " - in 'y': chrM, chr7_gl000195_random, chrUn_gl000219, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr17_gl000205_random\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000223, chrUn_gl000225\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr7_gl000195_random, chr11_gl000202_random, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr17_gl000205_random, chrUn_gl000220, chrUn_gl000225\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr17_gl000205_random, chrUn_gl000214, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr7_gl000195_random, chrUn_gl000220, chrUn_gl000242\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr18\n", " - in 'y': chrM, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr18, chrY\n", " - in 'y': chrM\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrX, chrY\n", " - in 'y': chrM\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr20, chrX, chrY\n", " - in 'y': chrM\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr15\n", " - in 'y': chrM\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr7_gl000195_random, chrUn_gl000220, chrUn_gl000225\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chr14, chrY\n", " - in 'y': chrM, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”Warning message in .Seqinfo.mergexy(x, y):\n", "“Each of the 2 combined objects has sequence levels not in the other:\n", " - in 'x': chrY\n", " - in 'y': chrM, chr7_gl000195_random, chr9_gl000199_random, chr17_gl000205_random, chrUn_gl000212, chrUn_gl000219, chrUn_gl000220\n", " Make sure to always combine/compare objects based on the same reference\n", " genome (use suppressWarnings() to suppress this warning).”" ] } ], "source": [ "df_out <- SCRATsummary(dir = \"../../input/sc-bams_nodup/\", \n", " genome = \"hg19\",\n", " featurelist=\"MOTIF_JASPAR\",\n", " log2transform = FALSE, adjustlen = FALSE)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "end_time <- Sys.time()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Time difference of 6.359369 hours" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "end_time - start_time" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 12, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 519
  2. \n", "\t
  3. 2034
  4. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 519\n", "\\item 2034\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 519\n", "2. 2034\n", "\n", "\n" ], "text/plain": [ "[1] 519 2034" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
BM1077-CLP-Frozen-160106-13.dedup.st.bamBM1077-CLP-Frozen-160106-14.dedup.st.bamBM1077-CLP-Frozen-160106-2.dedup.st.bamBM1077-CLP-Frozen-160106-21.dedup.st.bamBM1077-CLP-Frozen-160106-27.dedup.st.bam
MOTIF:MA0002.2:RUNX11872.65921327.85761626.83821472.30322131.500
MOTIF:MA0003.3:TFAP2A3489.95572505.13353281.25002736.88053790.288
MOTIF:MA0004.1:Arnt1467.48381115.67421484.37501169.82511443.919
MOTIF:MA0006.1:Ahr::Arnt2659.17602060.23272693.01472099.12542819.080
MOTIF:MA0007.3:Ar 146.4079 102.6694 119.4853 116.6181 124.624
\n" ], "text/latex": [ "\\begin{tabular}{r|lllll}\n", " & BM1077-CLP-Frozen-160106-13.dedup.st.bam & BM1077-CLP-Frozen-160106-14.dedup.st.bam & BM1077-CLP-Frozen-160106-2.dedup.st.bam & BM1077-CLP-Frozen-160106-21.dedup.st.bam & BM1077-CLP-Frozen-160106-27.dedup.st.bam\\\\\n", "\\hline\n", "\tMOTIF:MA0002.2:RUNX1 & 1872.6592 & 1327.8576 & 1626.8382 & 1472.3032 & 2131.500 \\\\\n", "\tMOTIF:MA0003.3:TFAP2A & 3489.9557 & 2505.1335 & 3281.2500 & 2736.8805 & 3790.288 \\\\\n", "\tMOTIF:MA0004.1:Arnt & 1467.4838 & 1115.6742 & 1484.3750 & 1169.8251 & 1443.919 \\\\\n", "\tMOTIF:MA0006.1:Ahr::Arnt & 2659.1760 & 2060.2327 & 2693.0147 & 2099.1254 & 2819.080 \\\\\n", "\tMOTIF:MA0007.3:Ar & 146.4079 & 102.6694 & 119.4853 & 116.6181 & 124.624 \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "| | BM1077-CLP-Frozen-160106-13.dedup.st.bam | BM1077-CLP-Frozen-160106-14.dedup.st.bam | BM1077-CLP-Frozen-160106-2.dedup.st.bam | BM1077-CLP-Frozen-160106-21.dedup.st.bam | BM1077-CLP-Frozen-160106-27.dedup.st.bam |\n", "|---|---|---|---|---|---|\n", "| MOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |\n", "| MOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |\n", "| MOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |\n", "| MOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |\n", "| MOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |\n", "\n" ], "text/plain": [ " BM1077-CLP-Frozen-160106-13.dedup.st.bam\n", "MOTIF:MA0002.2:RUNX1 1872.6592 \n", "MOTIF:MA0003.3:TFAP2A 3489.9557 \n", "MOTIF:MA0004.1:Arnt 1467.4838 \n", "MOTIF:MA0006.1:Ahr::Arnt 2659.1760 \n", "MOTIF:MA0007.3:Ar 146.4079 \n", " BM1077-CLP-Frozen-160106-14.dedup.st.bam\n", "MOTIF:MA0002.2:RUNX1 1327.8576 \n", "MOTIF:MA0003.3:TFAP2A 2505.1335 \n", "MOTIF:MA0004.1:Arnt 1115.6742 \n", "MOTIF:MA0006.1:Ahr::Arnt 2060.2327 \n", "MOTIF:MA0007.3:Ar 102.6694 \n", " BM1077-CLP-Frozen-160106-2.dedup.st.bam\n", "MOTIF:MA0002.2:RUNX1 1626.8382 \n", "MOTIF:MA0003.3:TFAP2A 3281.2500 \n", "MOTIF:MA0004.1:Arnt 1484.3750 \n", "MOTIF:MA0006.1:Ahr::Arnt 2693.0147 \n", "MOTIF:MA0007.3:Ar 119.4853 \n", " BM1077-CLP-Frozen-160106-21.dedup.st.bam\n", "MOTIF:MA0002.2:RUNX1 1472.3032 \n", "MOTIF:MA0003.3:TFAP2A 2736.8805 \n", "MOTIF:MA0004.1:Arnt 1169.8251 \n", "MOTIF:MA0006.1:Ahr::Arnt 2099.1254 \n", "MOTIF:MA0007.3:Ar 116.6181 \n", " BM1077-CLP-Frozen-160106-27.dedup.st.bam\n", "MOTIF:MA0002.2:RUNX1 2131.500 \n", "MOTIF:MA0003.3:TFAP2A 3790.288 \n", "MOTIF:MA0004.1:Arnt 1443.919 \n", "MOTIF:MA0006.1:Ahr::Arnt 2819.080 \n", "MOTIF:MA0007.3:Ar 124.624 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dim(df_out)\n", "df_out[1:5,1:5]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 519
  2. \n", "\t
  3. 2034
  4. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 519\n", "\\item 2034\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 519\n", "2. 2034\n", "\n", "\n" ], "text/plain": [ "[1] 519 2034" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
BM1077-CLP-Frozen-160106-13BM1077-CLP-Frozen-160106-14BM1077-CLP-Frozen-160106-2BM1077-CLP-Frozen-160106-21BM1077-CLP-Frozen-160106-27
MOTIF:MA0002.2:RUNX11872.65921327.85761626.83821472.30322131.500
MOTIF:MA0003.3:TFAP2A3489.95572505.13353281.25002736.88053790.288
MOTIF:MA0004.1:Arnt1467.48381115.67421484.37501169.82511443.919
MOTIF:MA0006.1:Ahr::Arnt2659.17602060.23272693.01472099.12542819.080
MOTIF:MA0007.3:Ar 146.4079 102.6694 119.4853 116.6181 124.624
\n" ], "text/latex": [ "\\begin{tabular}{r|lllll}\n", " & BM1077-CLP-Frozen-160106-13 & BM1077-CLP-Frozen-160106-14 & BM1077-CLP-Frozen-160106-2 & BM1077-CLP-Frozen-160106-21 & BM1077-CLP-Frozen-160106-27\\\\\n", "\\hline\n", "\tMOTIF:MA0002.2:RUNX1 & 1872.6592 & 1327.8576 & 1626.8382 & 1472.3032 & 2131.500 \\\\\n", "\tMOTIF:MA0003.3:TFAP2A & 3489.9557 & 2505.1335 & 3281.2500 & 2736.8805 & 3790.288 \\\\\n", "\tMOTIF:MA0004.1:Arnt & 1467.4838 & 1115.6742 & 1484.3750 & 1169.8251 & 1443.919 \\\\\n", "\tMOTIF:MA0006.1:Ahr::Arnt & 2659.1760 & 2060.2327 & 2693.0147 & 2099.1254 & 2819.080 \\\\\n", "\tMOTIF:MA0007.3:Ar & 146.4079 & 102.6694 & 119.4853 & 116.6181 & 124.624 \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "| | BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 |\n", "|---|---|---|---|---|---|\n", "| MOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |\n", "| MOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |\n", "| MOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |\n", "| MOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |\n", "| MOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |\n", "\n" ], "text/plain": [ " BM1077-CLP-Frozen-160106-13\n", "MOTIF:MA0002.2:RUNX1 1872.6592 \n", "MOTIF:MA0003.3:TFAP2A 3489.9557 \n", "MOTIF:MA0004.1:Arnt 1467.4838 \n", "MOTIF:MA0006.1:Ahr::Arnt 2659.1760 \n", "MOTIF:MA0007.3:Ar 146.4079 \n", " BM1077-CLP-Frozen-160106-14 BM1077-CLP-Frozen-160106-2\n", "MOTIF:MA0002.2:RUNX1 1327.8576 1626.8382 \n", "MOTIF:MA0003.3:TFAP2A 2505.1335 3281.2500 \n", "MOTIF:MA0004.1:Arnt 1115.6742 1484.3750 \n", "MOTIF:MA0006.1:Ahr::Arnt 2060.2327 2693.0147 \n", "MOTIF:MA0007.3:Ar 102.6694 119.4853 \n", " BM1077-CLP-Frozen-160106-21\n", "MOTIF:MA0002.2:RUNX1 1472.3032 \n", "MOTIF:MA0003.3:TFAP2A 2736.8805 \n", "MOTIF:MA0004.1:Arnt 1169.8251 \n", "MOTIF:MA0006.1:Ahr::Arnt 2099.1254 \n", "MOTIF:MA0007.3:Ar 116.6181 \n", " BM1077-CLP-Frozen-160106-27\n", "MOTIF:MA0002.2:RUNX1 2131.500 \n", "MOTIF:MA0003.3:TFAP2A 3790.288 \n", "MOTIF:MA0004.1:Arnt 1443.919 \n", "MOTIF:MA0006.1:Ahr::Arnt 2819.080 \n", "MOTIF:MA0007.3:Ar 124.624 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "colnames(df_out) = sapply(strsplit(colnames(df_out), \"\\\\.\"),'[',1)\n", "dim(df_out)\n", "df_out[1:5,1:5]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "if(! all(colnames(df_out) == rownames(metadata))){\n", " df_out = df_out[,rownames(metadata)]\n", " dim(df_out)\n", " df_out[1:5,1:5]\n", "}" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
    \n", "\t
  1. 519
  2. \n", "\t
  3. 2034
  4. \n", "
\n" ], "text/latex": [ "\\begin{enumerate*}\n", "\\item 519\n", "\\item 2034\n", "\\end{enumerate*}\n" ], "text/markdown": [ "1. 519\n", "2. 2034\n", "\n", "\n" ], "text/plain": [ "[1] 519 2034" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n", "\n", "\t\n", "\t\n", "\t\n", "\t\n", "\t\n", "\n", "
BM1077-CLP-Frozen-160106-13BM1077-CLP-Frozen-160106-14BM1077-CLP-Frozen-160106-2BM1077-CLP-Frozen-160106-21BM1077-CLP-Frozen-160106-27
MOTIF:MA0002.2:RUNX11872.65921327.85761626.83821472.30322131.500
MOTIF:MA0003.3:TFAP2A3489.95572505.13353281.25002736.88053790.288
MOTIF:MA0004.1:Arnt1467.48381115.67421484.37501169.82511443.919
MOTIF:MA0006.1:Ahr::Arnt2659.17602060.23272693.01472099.12542819.080
MOTIF:MA0007.3:Ar 146.4079 102.6694 119.4853 116.6181 124.624
\n" ], "text/latex": [ "\\begin{tabular}{r|lllll}\n", " & BM1077-CLP-Frozen-160106-13 & BM1077-CLP-Frozen-160106-14 & BM1077-CLP-Frozen-160106-2 & BM1077-CLP-Frozen-160106-21 & BM1077-CLP-Frozen-160106-27\\\\\n", "\\hline\n", "\tMOTIF:MA0002.2:RUNX1 & 1872.6592 & 1327.8576 & 1626.8382 & 1472.3032 & 2131.500 \\\\\n", "\tMOTIF:MA0003.3:TFAP2A & 3489.9557 & 2505.1335 & 3281.2500 & 2736.8805 & 3790.288 \\\\\n", "\tMOTIF:MA0004.1:Arnt & 1467.4838 & 1115.6742 & 1484.3750 & 1169.8251 & 1443.919 \\\\\n", "\tMOTIF:MA0006.1:Ahr::Arnt & 2659.1760 & 2060.2327 & 2693.0147 & 2099.1254 & 2819.080 \\\\\n", "\tMOTIF:MA0007.3:Ar & 146.4079 & 102.6694 & 119.4853 & 116.6181 & 124.624 \\\\\n", "\\end{tabular}\n" ], "text/markdown": [ "\n", "| | BM1077-CLP-Frozen-160106-13 | BM1077-CLP-Frozen-160106-14 | BM1077-CLP-Frozen-160106-2 | BM1077-CLP-Frozen-160106-21 | BM1077-CLP-Frozen-160106-27 |\n", "|---|---|---|---|---|---|\n", "| MOTIF:MA0002.2:RUNX1 | 1872.6592 | 1327.8576 | 1626.8382 | 1472.3032 | 2131.500 |\n", "| MOTIF:MA0003.3:TFAP2A | 3489.9557 | 2505.1335 | 3281.2500 | 2736.8805 | 3790.288 |\n", "| MOTIF:MA0004.1:Arnt | 1467.4838 | 1115.6742 | 1484.3750 | 1169.8251 | 1443.919 |\n", "| MOTIF:MA0006.1:Ahr::Arnt | 2659.1760 | 2060.2327 | 2693.0147 | 2099.1254 | 2819.080 |\n", "| MOTIF:MA0007.3:Ar | 146.4079 | 102.6694 | 119.4853 | 116.6181 | 124.624 |\n", "\n" ], "text/plain": [ " BM1077-CLP-Frozen-160106-13\n", "MOTIF:MA0002.2:RUNX1 1872.6592 \n", "MOTIF:MA0003.3:TFAP2A 3489.9557 \n", "MOTIF:MA0004.1:Arnt 1467.4838 \n", "MOTIF:MA0006.1:Ahr::Arnt 2659.1760 \n", "MOTIF:MA0007.3:Ar 146.4079 \n", " BM1077-CLP-Frozen-160106-14 BM1077-CLP-Frozen-160106-2\n", "MOTIF:MA0002.2:RUNX1 1327.8576 1626.8382 \n", "MOTIF:MA0003.3:TFAP2A 2505.1335 3281.2500 \n", "MOTIF:MA0004.1:Arnt 1115.6742 1484.3750 \n", "MOTIF:MA0006.1:Ahr::Arnt 2060.2327 2693.0147 \n", "MOTIF:MA0007.3:Ar 102.6694 119.4853 \n", " BM1077-CLP-Frozen-160106-21\n", "MOTIF:MA0002.2:RUNX1 1472.3032 \n", "MOTIF:MA0003.3:TFAP2A 2736.8805 \n", "MOTIF:MA0004.1:Arnt 1169.8251 \n", "MOTIF:MA0006.1:Ahr::Arnt 2099.1254 \n", "MOTIF:MA0007.3:Ar 116.6181 \n", " BM1077-CLP-Frozen-160106-27\n", "MOTIF:MA0002.2:RUNX1 2131.500 \n", "MOTIF:MA0003.3:TFAP2A 3790.288 \n", "MOTIF:MA0004.1:Arnt 1443.919 \n", "MOTIF:MA0006.1:Ahr::Arnt 2819.080 \n", "MOTIF:MA0007.3:Ar 124.624 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dim(df_out)\n", "df_out[1:5,1:5]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "saveRDS(df_out, file = '../../output/feature_matrices/FM_SCRAT_buenrostro2018_motifs.rds')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "R version 3.5.1 (2018-07-02)\n", "Platform: x86_64-conda_cos6-linux-gnu (64-bit)\n", "Running under: CentOS Linux 7 (Core)\n", "\n", "Matrix products: default\n", "BLAS/LAPACK: /data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_SCRAT/lib/R/lib/libRblas.so\n", "\n", "locale:\n", " [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C \n", " [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 \n", " [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 \n", " [7] LC_PAPER=en_US.UTF-8 LC_NAME=C \n", " [9] LC_ADDRESS=C LC_TELEPHONE=C \n", "[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C \n", "\n", "attached base packages:\n", "[1] stats4 parallel stats graphics grDevices utils datasets \n", "[8] methods base \n", "\n", "other attached packages:\n", " [1] SCRAT_0.99.0 SCRATdatahg19_0.99.1 \n", " [3] GenomicAlignments_1.18.1 Rsamtools_1.34.0 \n", " [5] Biostrings_2.50.2 XVector_0.22.0 \n", " [7] SummarizedExperiment_1.12.0 DelayedArray_0.8.0 \n", " [9] BiocParallel_1.16.6 matrixStats_0.54.0 \n", "[11] Biobase_2.42.0 GenomicRanges_1.34.0 \n", "[13] GenomeInfoDb_1.18.1 IRanges_2.16.0 \n", "[15] S4Vectors_0.20.1 BiocGenerics_0.28.0 \n", "[17] usethis_1.5.0 devtools_2.0.2 \n", "\n", "loaded via a namespace (and not attached):\n", " [1] tsne_0.1-3 bitops_1.0-6 fs_1.2.7 \n", " [4] RColorBrewer_1.1-2 rprojroot_1.3-2 repr_0.19.2 \n", " [7] tools_3.5.1 backports_1.1.4 R6_2.4.0 \n", "[10] DT_0.5 KernSmooth_2.23-15 lazyeval_0.2.2 \n", "[13] colorspace_1.4-1 withr_2.1.2 tidyselect_0.2.5 \n", "[16] prettyunits_1.0.2 processx_3.3.0 compiler_3.5.1 \n", "[19] cli_1.1.0 desc_1.2.0 caTools_1.17.1.2 \n", "[22] scales_1.0.0 callr_3.2.0 pbdZMQ_0.3-3 \n", "[25] stringr_1.4.0 digest_0.6.18 shinyBS_0.61 \n", "[28] scatterD3_0.9 dbscan_1.1-3 base64enc_0.1-3 \n", "[31] pkgconfig_2.0.2 htmltools_0.3.6 sessioninfo_1.1.1 \n", "[34] htmlwidgets_1.3 rlang_0.3.4 shiny_1.3.2 \n", "[37] jsonlite_1.6 mclust_5.4.3 gtools_3.8.1 \n", "[40] dplyr_0.8.0.1 RCurl_1.95-4.11 magrittr_1.5 \n", "[43] GenomeInfoDbData_1.2.0 Matrix_1.2-17 Rcpp_1.0.1 \n", "[46] IRkernel_0.8.15 munsell_0.5.0 stringi_1.4.3 \n", "[49] zlibbioc_1.28.0 pkgbuild_1.0.3 gplots_3.0.1.1 \n", "[52] plyr_1.8.4 grid_3.5.1 gdata_2.18.0 \n", "[55] promises_1.0.1 crayon_1.3.4 lattice_0.20-38 \n", "[58] IRdisplay_0.7.0 ps_1.3.0 pillar_1.3.1 \n", "[61] uuid_0.1-2 reshape2_1.4.3 pkgload_1.0.2 \n", "[64] glue_1.3.1 evaluate_0.13 remotes_2.0.4 \n", "[67] httpuv_1.5.1 gtable_0.3.0 purrr_0.3.2 \n", "[70] assertthat_0.2.1 ggplot2_3.1.1 mime_0.6 \n", "[73] xtable_1.8-4 later_0.8.0 tibble_2.1.1 \n", "[76] pheatmap_1.0.12 memoise_1.1.0 ellipse_0.4.1 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sessionInfo()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "save.image(file = 'SCRAT_buenrostro2018.RData')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "R [conda env:ATACseq_SCRAT]", "language": "R", "name": "conda-env-ATACseq_SCRAT-r" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 2 }