{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Installation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`devtools::install_github(\"zji90/SCRATdatamm9\")`  \n",
    "`source(\"https://raw.githubusercontent.com/zji90/SCRATdata/master/installcode.R\")`  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  Import packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading required package: BiocGenerics\n",
      "Loading required package: parallel\n",
      "\n",
      "Attaching package: ‘BiocGenerics’\n",
      "\n",
      "The following objects are masked from ‘package:parallel’:\n",
      "\n",
      "    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\n",
      "    clusterExport, clusterMap, parApply, parCapply, parLapply,\n",
      "    parLapplyLB, parRapply, parSapply, parSapplyLB\n",
      "\n",
      "The following objects are masked from ‘package:stats’:\n",
      "\n",
      "    IQR, mad, sd, var, xtabs\n",
      "\n",
      "The following objects are masked from ‘package:base’:\n",
      "\n",
      "    anyDuplicated, append, as.data.frame, basename, cbind, colMeans,\n",
      "    colnames, colSums, dirname, do.call, duplicated, eval, evalq,\n",
      "    Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,\n",
      "    lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,\n",
      "    pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,\n",
      "    rowSums, sapply, setdiff, sort, table, tapply, union, unique,\n",
      "    unsplit, which, which.max, which.min\n",
      "\n",
      "Loading required package: S4Vectors\n",
      "Loading required package: stats4\n",
      "\n",
      "Attaching package: ‘S4Vectors’\n",
      "\n",
      "The following object is masked from ‘package:base’:\n",
      "\n",
      "    expand.grid\n",
      "\n",
      "Loading required package: IRanges\n",
      "Loading required package: GenomeInfoDb\n",
      "Loading required package: GenomicRanges\n",
      "Loading required package: SummarizedExperiment\n",
      "Loading required package: Biobase\n",
      "Welcome to Bioconductor\n",
      "\n",
      "    Vignettes contain introductory material; view with\n",
      "    'browseVignettes()'. To cite Bioconductor, see\n",
      "    'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n",
      "\n",
      "Loading required package: DelayedArray\n",
      "Loading required package: matrixStats\n",
      "\n",
      "Attaching package: ‘matrixStats’\n",
      "\n",
      "The following objects are masked from ‘package:Biobase’:\n",
      "\n",
      "    anyMissing, rowMedians\n",
      "\n",
      "Loading required package: BiocParallel\n",
      "\n",
      "Attaching package: ‘DelayedArray’\n",
      "\n",
      "The following objects are masked from ‘package:matrixStats’:\n",
      "\n",
      "    colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges\n",
      "\n",
      "The following objects are masked from ‘package:base’:\n",
      "\n",
      "    aperm, apply\n",
      "\n",
      "Loading required package: Biostrings\n",
      "Loading required package: XVector\n",
      "\n",
      "Attaching package: ‘Biostrings’\n",
      "\n",
      "The following object is masked from ‘package:DelayedArray’:\n",
      "\n",
      "    type\n",
      "\n",
      "The following object is masked from ‘package:base’:\n",
      "\n",
      "    strsplit\n",
      "\n",
      "Loading required package: Rsamtools\n",
      "Warning message:\n",
      "“replacing previous import ‘DT::dataTableOutput’ by ‘shiny::dataTableOutput’ when loading ‘SCRAT’”Warning message:\n",
      "“replacing previous import ‘DT::renderDataTable’ by ‘shiny::renderDataTable’ when loading ‘SCRAT’”Warning message:\n",
      "“replacing previous import ‘mclust::em’ by ‘shiny::em’ when loading ‘SCRAT’”"
     ]
    }
   ],
   "source": [
    "library(devtools)\n",
    "library(GenomicAlignments)\n",
    "library(Rsamtools)\n",
    "library(SCRATdatahg19)\n",
    "library(SCRAT)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Obtain Feature Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time = Sys.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata <- read.table('../../input/metadata.tsv',\n",
    "                         header = TRUE,\n",
    "                         stringsAsFactors=FALSE,quote=\"\",row.names=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "SCRATsummary <- function (dir = \"\", genome, bamfile = NULL, singlepair = \"automated\", \n",
    "    removeblacklist = T, log2transform = T, adjustlen = T, featurelist = c(\"GENE\", \n",
    "        \"ENCL\", \"MOTIF_TRANSFAC\", \"MOTIF_JASPAR\", \"GSEA\"), customfeature = NULL, \n",
    "    Genestarttype = \"TSSup\", Geneendtype = \"TSSdown\", Genestartbp = 3000, \n",
    "    Geneendbp = 1000, ENCLclunum = 2000, Motifflank = 100, GSEAterm = \"c5.bp\", \n",
    "    GSEAstarttype = \"TSSup\", GSEAendtype = \"TSSdown\", GSEAstartbp = 3000, \n",
    "    GSEAendbp = 1000) \n",
    "{\n",
    "    if (is.null(bamfile)) {\n",
    "        bamfile <- list.files(dir, pattern = \".bam$\")\n",
    "    }\n",
    "    datapath <- system.file(\"extdata\", package = paste0(\"SCRATdata\", \n",
    "        genome))\n",
    "    bamdata <- list()\n",
    "    for (i in bamfile) {\n",
    "        filepath <- file.path(dir, i)\n",
    "        if (singlepair == \"automated\") {\n",
    "            bamfile <- BamFile(filepath)\n",
    "            tmpsingle <- readGAlignments(bamfile)\n",
    "            tmppair <- readGAlignmentPairs(bamfile)\n",
    "            pairendtf <- testPairedEndBam(bamfile)\n",
    "            if (pairendtf) {\n",
    "                tmp <- tmppair\n",
    "                startpos <- pmin(start(first(tmp)), start(last(tmp)))\n",
    "                endpos <- pmax(end(first(tmp)), end(last(tmp)))\n",
    "                id <- which(!is.na(as.character(seqnames(tmp))))\n",
    "                tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))\n",
    "            }\n",
    "            else {\n",
    "                tmp <- GRanges(tmpsingle)\n",
    "            }\n",
    "        }\n",
    "        else if (singlepair == \"single\") {\n",
    "            tmp <- GRanges(readGAlignments(filepath))\n",
    "        }\n",
    "        else if (singlepair == \"pair\") {\n",
    "            tmp <- readGAlignmentPairs(filepath)\n",
    "            startpos <- pmin(start(first(tmp)), start(last(tmp)))\n",
    "            endpos <- pmax(end(first(tmp)), end(last(tmp)))\n",
    "            id <- which(!is.na(as.character(seqnames(tmp))))\n",
    "            tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))\n",
    "        }\n",
    "        if (removeblacklist) {\n",
    "            load(paste0(datapath, \"/gr/blacklist.rda\"))\n",
    "            tmp <- tmp[-as.matrix(findOverlaps(tmp, gr))[, 1], \n",
    "                ]\n",
    "        }\n",
    "        bamdata[[i]] <- tmp\n",
    "    }\n",
    "    bamsummary <- sapply(bamdata, length)\n",
    "    allres <- NULL\n",
    "    datapath <- system.file(\"extdata\", package = paste0(\"SCRATdata\", \n",
    "        genome))\n",
    "    if (\"GENE\" %in% featurelist) {\n",
    "        print(\"Processing GENE features\")\n",
    "        load(paste0(datapath, \"/gr/generegion.rda\"))\n",
    "        if (Genestarttype == \"TSSup\") {\n",
    "            grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                start(gr) - as.numeric(Genestartbp), end(gr) + \n",
    "                  as.numeric(Genestartbp))\n",
    "        }\n",
    "        else if (Genestarttype == \"TSSdown\") {\n",
    "            grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                start(gr) + as.numeric(Genestartbp), end(gr) - \n",
    "                  as.numeric(Genestartbp))\n",
    "        }\n",
    "        else if (Genestarttype == \"TESup\") {\n",
    "            grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                end(gr) - as.numeric(Genestartbp), start(gr) + \n",
    "                  as.numeric(Genestartbp))\n",
    "        }\n",
    "        else if (Genestarttype == \"TESdown\") {\n",
    "            grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                end(gr) + as.numeric(Genestartbp), start(gr) - \n",
    "                  as.numeric(Genestartbp))\n",
    "        }\n",
    "        if (Geneendtype == \"TSSup\") {\n",
    "            grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                start(gr) - as.numeric(Geneendbp), end(gr) + \n",
    "                  as.numeric(Geneendbp))\n",
    "        }\n",
    "        else if (Geneendtype == \"TSSdown\") {\n",
    "            grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                start(gr) + as.numeric(Geneendbp), end(gr) - \n",
    "                  as.numeric(Geneendbp))\n",
    "        }\n",
    "        else if (Geneendtype == \"TESup\") {\n",
    "            grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                end(gr) - as.numeric(Geneendbp), start(gr) + \n",
    "                  as.numeric(Geneendbp))\n",
    "        }\n",
    "        else if (Geneendtype == \"TESdown\") {\n",
    "            grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                end(gr) + as.numeric(Geneendbp), start(gr) - \n",
    "                  as.numeric(Geneendbp))\n",
    "        }\n",
    "        ngr <- names(gr)\n",
    "        gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart, \n",
    "            grend), end = pmax(grstart, grend)))\n",
    "        names(gr) <- ngr\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- end(gr) - start(gr) + 1\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "    }\n",
    "    if (\"ENCL\" %in% featurelist) {\n",
    "        print(\"Processing ENCL features\")\n",
    "        load(paste0(datapath, \"/gr/ENCL\", ENCLclunum, \".rda\"))\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "    }\n",
    "    if (\"MOTIF_TRANSFAC\" %in% featurelist) {\n",
    "        print(\"Processing MOTIF_TRANSFAC features\")\n",
    "        load(paste0(datapath, \"/gr/transfac1.rda\"))\n",
    "        gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "        load(paste0(datapath, \"/gr/transfac2.rda\"))\n",
    "        gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "        if (genome %in% c(\"hg19\", \"hg38\")) {\n",
    "            load(paste0(datapath, \"/gr/transfac3.rda\"))\n",
    "            gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "            tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "                i))\n",
    "            tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "            if (log2transform) {\n",
    "                tmp <- log2(tmp + 1)\n",
    "            }\n",
    "            if (adjustlen) {\n",
    "                grrange <- sapply(gr, function(i) sum(end(i) - \n",
    "                  start(i) + 1))\n",
    "                tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "            }\n",
    "            tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "            allres <- rbind(allres, tmp)\n",
    "        }\n",
    "    }\n",
    "    if (\"MOTIF_JASPAR\" %in% featurelist) {\n",
    "        print(\"Processing MOTIF_JASPAR features\")\n",
    "        load(paste0(datapath, \"/gr/jaspar1.rda\"))\n",
    "        gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "        load(paste0(datapath, \"/gr/jaspar2.rda\"))\n",
    "        gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "    }\n",
    "    if (\"GSEA\" %in% featurelist) {\n",
    "        print(\"Processing GSEA features\")\n",
    "        for (i in GSEAterm) {\n",
    "            load(paste0(datapath, \"/gr/GSEA\", i, \".rda\"))\n",
    "            allgr <- gr\n",
    "            for (sgrn in names(allgr)) {\n",
    "                gr <- allgr[[sgrn]]\n",
    "                if (GSEAstarttype == \"TSSup\") {\n",
    "                  grstart <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", start(gr) - as.numeric(GSEAstartbp), \n",
    "                    end(gr) + as.numeric(GSEAstartbp))\n",
    "                }\n",
    "                else if (GSEAstarttype == \"TSSdown\") {\n",
    "                  grstart <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", start(gr) + as.numeric(GSEAstartbp), \n",
    "                    end(gr) - as.numeric(GSEAstartbp))\n",
    "                }\n",
    "                else if (GSEAstarttype == \"TESup\") {\n",
    "                  grstart <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", end(gr) - as.numeric(GSEAstartbp), start(gr) + \n",
    "                    as.numeric(GSEAstartbp))\n",
    "                }\n",
    "                else if (GSEAstarttype == \"TESdown\") {\n",
    "                  grstart <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", end(gr) + as.numeric(GSEAstartbp), start(gr) - \n",
    "                    as.numeric(GSEAstartbp))\n",
    "                }\n",
    "                if (GSEAendtype == \"TSSup\") {\n",
    "                  grend <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", start(gr) - as.numeric(GSEAendbp), end(gr) + \n",
    "                    as.numeric(GSEAendbp))\n",
    "                }\n",
    "                else if (GSEAendtype == \"TSSdown\") {\n",
    "                  grend <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", start(gr) + as.numeric(GSEAendbp), end(gr) - \n",
    "                    as.numeric(GSEAendbp))\n",
    "                }\n",
    "                else if (GSEAendtype == \"TESup\") {\n",
    "                  grend <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", end(gr) - as.numeric(GSEAendbp), start(gr) + \n",
    "                    as.numeric(GSEAendbp))\n",
    "                }\n",
    "                else if (GSEAendtype == \"TESdown\") {\n",
    "                  grend <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", end(gr) + as.numeric(GSEAendbp), start(gr) - \n",
    "                    as.numeric(GSEAendbp))\n",
    "                }\n",
    "                ngr <- names(gr)\n",
    "                gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart, \n",
    "                  grend), end = pmax(grstart, grend)))\n",
    "                names(gr) <- ngr\n",
    "                allgr[[sgrn]] <- gr\n",
    "            }\n",
    "            gr <- allgr\n",
    "            tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "                i))\n",
    "            tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "            if (log2transform) {\n",
    "                tmp <- log2(tmp + 1)\n",
    "            }\n",
    "            if (adjustlen) {\n",
    "                grrange <- sapply(gr, function(i) sum(end(i) - \n",
    "                  start(i) + 1))\n",
    "                tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "            }\n",
    "            tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "            allres <- rbind(allres, tmp)\n",
    "        }\n",
    "    }\n",
    "    if (\"Custom\" %in% featurelist) {\n",
    "        print(\"Processing custom features\")\n",
    "        gr <- read.table(customfeature, as.is = T, sep = \"\\t\")\n",
    "        gr <- GRanges(seqnames = gr[, 1], IRanges(start = gr[, \n",
    "            2], end = gr[, 3]))\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- end(gr) - start(gr) + 1\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "    }\n",
    "    allres\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_out <- SCRATsummary(dir = \"../../input/sc-bams_nodup/\", \n",
    "                               genome = \"mm9\",\n",
    "                               featurelist=\"MOTIF_JASPAR\",\n",
    "                               log2transform = FALSE, adjustlen = FALSE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "end_time <- Sys.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Time difference of 1.188708 days"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "end_time - start_time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<ol class=list-inline>\n",
       "\t<li>519</li>\n",
       "\t<li>12178</li>\n",
       "</ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 519\n",
       "\\item 12178\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 519\n",
       "2. 12178\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1]   519 12178"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th></th><th scope=col>BoneMarrow_62016.AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT.header.bam</th><th scope=col>BoneMarrow_62016.AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC.header.bam</th><th scope=col>BoneMarrow_62016.AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC.header.bam</th><th scope=col>BoneMarrow_62016.AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC.header.bam</th><th scope=col>BoneMarrow_62016.AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC.header.bam</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>MOTIF:MA0002.2:RUNX1</th><td>779.67587 </td><td>786.52359 </td><td>679.96058 </td><td>584.14298 </td><td> 832.58595</td></tr>\n",
       "\t<tr><th scope=row>MOTIF:MA0003.3:TFAP2A</th><td>760.69499 </td><td>739.16432 </td><td>593.73370 </td><td>680.10076 </td><td>1234.67862</td></tr>\n",
       "\t<tr><th scope=row>MOTIF:MA0004.1:Arnt</th><td>508.10337 </td><td>528.14579 </td><td>408.81688 </td><td>394.62636 </td><td> 689.08819</td></tr>\n",
       "\t<tr><th scope=row>MOTIF:MA0006.1:Ahr::Arnt</th><td>727.11345 </td><td>816.49781 </td><td>618.65979 </td><td>638.11923 </td><td>1053.81166</td></tr>\n",
       "\t<tr><th scope=row>MOTIF:MA0007.3:Ar</th><td> 54.02249 </td><td> 52.15515 </td><td> 48.25807 </td><td> 56.37519 </td><td>  62.78027</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "\\begin{tabular}{r|lllll}\n",
       "  & BoneMarrow\\_62016.AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT.header.bam & BoneMarrow\\_62016.AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC.header.bam & BoneMarrow\\_62016.AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC.header.bam & BoneMarrow\\_62016.AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC.header.bam & BoneMarrow\\_62016.AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC.header.bam\\\\\n",
       "\\hline\n",
       "\tMOTIF:MA0002.2:RUNX1 & 779.67587  & 786.52359  & 679.96058  & 584.14298  &  832.58595\\\\\n",
       "\tMOTIF:MA0003.3:TFAP2A & 760.69499  & 739.16432  & 593.73370  & 680.10076  & 1234.67862\\\\\n",
       "\tMOTIF:MA0004.1:Arnt & 508.10337  & 528.14579  & 408.81688  & 394.62636  &  689.08819\\\\\n",
       "\tMOTIF:MA0006.1:Ahr::Arnt & 727.11345  & 816.49781  & 618.65979  & 638.11923  & 1053.81166\\\\\n",
       "\tMOTIF:MA0007.3:Ar &  54.02249  &  52.15515  &  48.25807  &  56.37519  &   62.78027\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "| <!--/--> | BoneMarrow_62016.AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT.header.bam | BoneMarrow_62016.AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC.header.bam | BoneMarrow_62016.AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC.header.bam | BoneMarrow_62016.AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC.header.bam | BoneMarrow_62016.AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC.header.bam |\n",
       "|---|---|---|---|---|---|\n",
       "| MOTIF:MA0002.2:RUNX1 | 779.67587  | 786.52359  | 679.96058  | 584.14298  |  832.58595 |\n",
       "| MOTIF:MA0003.3:TFAP2A | 760.69499  | 739.16432  | 593.73370  | 680.10076  | 1234.67862 |\n",
       "| MOTIF:MA0004.1:Arnt | 508.10337  | 528.14579  | 408.81688  | 394.62636  |  689.08819 |\n",
       "| MOTIF:MA0006.1:Ahr::Arnt | 727.11345  | 816.49781  | 618.65979  | 638.11923  | 1053.81166 |\n",
       "| MOTIF:MA0007.3:Ar |  54.02249  |  52.15515  |  48.25807  |  56.37519  |   62.78027 |\n",
       "\n"
      ],
      "text/plain": [
       "                         BoneMarrow_62016.AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT.header.bam\n",
       "MOTIF:MA0002.2:RUNX1     779.67587                                                       \n",
       "MOTIF:MA0003.3:TFAP2A    760.69499                                                       \n",
       "MOTIF:MA0004.1:Arnt      508.10337                                                       \n",
       "MOTIF:MA0006.1:Ahr::Arnt 727.11345                                                       \n",
       "MOTIF:MA0007.3:Ar         54.02249                                                       \n",
       "                         BoneMarrow_62016.AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC.header.bam\n",
       "MOTIF:MA0002.2:RUNX1     786.52359                                                       \n",
       "MOTIF:MA0003.3:TFAP2A    739.16432                                                       \n",
       "MOTIF:MA0004.1:Arnt      528.14579                                                       \n",
       "MOTIF:MA0006.1:Ahr::Arnt 816.49781                                                       \n",
       "MOTIF:MA0007.3:Ar         52.15515                                                       \n",
       "                         BoneMarrow_62016.AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC.header.bam\n",
       "MOTIF:MA0002.2:RUNX1     679.96058                                                       \n",
       "MOTIF:MA0003.3:TFAP2A    593.73370                                                       \n",
       "MOTIF:MA0004.1:Arnt      408.81688                                                       \n",
       "MOTIF:MA0006.1:Ahr::Arnt 618.65979                                                       \n",
       "MOTIF:MA0007.3:Ar         48.25807                                                       \n",
       "                         BoneMarrow_62016.AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC.header.bam\n",
       "MOTIF:MA0002.2:RUNX1     584.14298                                                       \n",
       "MOTIF:MA0003.3:TFAP2A    680.10076                                                       \n",
       "MOTIF:MA0004.1:Arnt      394.62636                                                       \n",
       "MOTIF:MA0006.1:Ahr::Arnt 638.11923                                                       \n",
       "MOTIF:MA0007.3:Ar         56.37519                                                       \n",
       "                         BoneMarrow_62016.AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC.header.bam\n",
       "MOTIF:MA0002.2:RUNX1      832.58595                                                      \n",
       "MOTIF:MA0003.3:TFAP2A    1234.67862                                                      \n",
       "MOTIF:MA0004.1:Arnt       689.08819                                                      \n",
       "MOTIF:MA0006.1:Ahr::Arnt 1053.81166                                                      \n",
       "MOTIF:MA0007.3:Ar          62.78027                                                      "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "dim(df_out)\n",
    "df_out[1:5,1:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<ol class=list-inline>\n",
       "\t<li>'AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT'</li>\n",
       "\t<li>'AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC'</li>\n",
       "\t<li>'AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC'</li>\n",
       "\t<li>'AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC'</li>\n",
       "\t<li>'AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC'</li>\n",
       "\t<li>'AGCGATAGAGTTGAATCAAAGCTAGGTTCCTATCCT'</li>\n",
       "</ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 'AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT'\n",
       "\\item 'AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC'\n",
       "\\item 'AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC'\n",
       "\\item 'AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC'\n",
       "\\item 'AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC'\n",
       "\\item 'AGCGATAGAGTTGAATCAAAGCTAGGTTCCTATCCT'\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 'AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT'\n",
       "2. 'AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC'\n",
       "3. 'AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC'\n",
       "4. 'AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC'\n",
       "5. 'AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC'\n",
       "6. 'AGCGATAGAGTTGAATCAAAGCTAGGTTCCTATCCT'\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1] \"AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT\"\n",
       "[2] \"AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC\"\n",
       "[3] \"AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC\"\n",
       "[4] \"AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC\"\n",
       "[5] \"AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC\"\n",
       "[6] \"AGCGATAGAGTTGAATCAAAGCTAGGTTCCTATCCT\""
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "head(sapply(strsplit(colnames(df_out), \"\\\\.\"),'[',2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<ol class=list-inline>\n",
       "\t<li>519</li>\n",
       "\t<li>12178</li>\n",
       "</ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 519\n",
       "\\item 12178\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 519\n",
       "2. 12178\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1]   519 12178"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead><tr><th></th><th scope=col>AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT</th><th scope=col>AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC</th><th scope=col>AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC</th><th scope=col>AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC</th><th scope=col>AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC</th></tr></thead>\n",
       "<tbody>\n",
       "\t<tr><th scope=row>MOTIF:MA0002.2:RUNX1</th><td>779.67587 </td><td>786.52359 </td><td>679.96058 </td><td>584.14298 </td><td> 832.58595</td></tr>\n",
       "\t<tr><th scope=row>MOTIF:MA0003.3:TFAP2A</th><td>760.69499 </td><td>739.16432 </td><td>593.73370 </td><td>680.10076 </td><td>1234.67862</td></tr>\n",
       "\t<tr><th scope=row>MOTIF:MA0004.1:Arnt</th><td>508.10337 </td><td>528.14579 </td><td>408.81688 </td><td>394.62636 </td><td> 689.08819</td></tr>\n",
       "\t<tr><th scope=row>MOTIF:MA0006.1:Ahr::Arnt</th><td>727.11345 </td><td>816.49781 </td><td>618.65979 </td><td>638.11923 </td><td>1053.81166</td></tr>\n",
       "\t<tr><th scope=row>MOTIF:MA0007.3:Ar</th><td> 54.02249 </td><td> 52.15515 </td><td> 48.25807 </td><td> 56.37519 </td><td>  62.78027</td></tr>\n",
       "</tbody>\n",
       "</table>\n"
      ],
      "text/latex": [
       "\\begin{tabular}{r|lllll}\n",
       "  & AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT & AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC & AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC & AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC & AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC\\\\\n",
       "\\hline\n",
       "\tMOTIF:MA0002.2:RUNX1 & 779.67587  & 786.52359  & 679.96058  & 584.14298  &  832.58595\\\\\n",
       "\tMOTIF:MA0003.3:TFAP2A & 760.69499  & 739.16432  & 593.73370  & 680.10076  & 1234.67862\\\\\n",
       "\tMOTIF:MA0004.1:Arnt & 508.10337  & 528.14579  & 408.81688  & 394.62636  &  689.08819\\\\\n",
       "\tMOTIF:MA0006.1:Ahr::Arnt & 727.11345  & 816.49781  & 618.65979  & 638.11923  & 1053.81166\\\\\n",
       "\tMOTIF:MA0007.3:Ar &  54.02249  &  52.15515  &  48.25807  &  56.37519  &   62.78027\\\\\n",
       "\\end{tabular}\n"
      ],
      "text/markdown": [
       "\n",
       "| <!--/--> | AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT | AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC | AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC | AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC | AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC |\n",
       "|---|---|---|---|---|---|\n",
       "| MOTIF:MA0002.2:RUNX1 | 779.67587  | 786.52359  | 679.96058  | 584.14298  |  832.58595 |\n",
       "| MOTIF:MA0003.3:TFAP2A | 760.69499  | 739.16432  | 593.73370  | 680.10076  | 1234.67862 |\n",
       "| MOTIF:MA0004.1:Arnt | 508.10337  | 528.14579  | 408.81688  | 394.62636  |  689.08819 |\n",
       "| MOTIF:MA0006.1:Ahr::Arnt | 727.11345  | 816.49781  | 618.65979  | 638.11923  | 1053.81166 |\n",
       "| MOTIF:MA0007.3:Ar |  54.02249  |  52.15515  |  48.25807  |  56.37519  |   62.78027 |\n",
       "\n"
      ],
      "text/plain": [
       "                         AGCGATAGAATACGATAATGGCAGCTCGCAGGACGT\n",
       "MOTIF:MA0002.2:RUNX1     779.67587                           \n",
       "MOTIF:MA0003.3:TFAP2A    760.69499                           \n",
       "MOTIF:MA0004.1:Arnt      508.10337                           \n",
       "MOTIF:MA0006.1:Ahr::Arnt 727.11345                           \n",
       "MOTIF:MA0007.3:Ar         54.02249                           \n",
       "                         AGCGATAGAATATTACTTTCCGCGGACTGTACTGAC\n",
       "MOTIF:MA0002.2:RUNX1     786.52359                           \n",
       "MOTIF:MA0003.3:TFAP2A    739.16432                           \n",
       "MOTIF:MA0004.1:Arnt      528.14579                           \n",
       "MOTIF:MA0006.1:Ahr::Arnt 816.49781                           \n",
       "MOTIF:MA0007.3:Ar         52.15515                           \n",
       "                         AGCGATAGACCAGGCGCATGGCAGCTCGATAGAGGC\n",
       "MOTIF:MA0002.2:RUNX1     679.96058                           \n",
       "MOTIF:MA0003.3:TFAP2A    593.73370                           \n",
       "MOTIF:MA0004.1:Arnt      408.81688                           \n",
       "MOTIF:MA0006.1:Ahr::Arnt 618.65979                           \n",
       "MOTIF:MA0007.3:Ar         48.25807                           \n",
       "                         AGCGATAGAGATTACGTTGCGCAATGACGTACTGAC\n",
       "MOTIF:MA0002.2:RUNX1     584.14298                           \n",
       "MOTIF:MA0003.3:TFAP2A    680.10076                           \n",
       "MOTIF:MA0004.1:Arnt      394.62636                           \n",
       "MOTIF:MA0006.1:Ahr::Arnt 638.11923                           \n",
       "MOTIF:MA0007.3:Ar         56.37519                           \n",
       "                         AGCGATAGAGGTCAGCTTGGAGTTGCGTGTACTGAC\n",
       "MOTIF:MA0002.2:RUNX1      832.58595                          \n",
       "MOTIF:MA0003.3:TFAP2A    1234.67862                          \n",
       "MOTIF:MA0004.1:Arnt       689.08819                          \n",
       "MOTIF:MA0006.1:Ahr::Arnt 1053.81166                          \n",
       "MOTIF:MA0007.3:Ar          62.78027                          "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "colnames(df_out) = sapply(strsplit(colnames(df_out), \"\\\\.\"),'[',2)\n",
    "dim(df_out)\n",
    "df_out[1:5,1:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<ol class=list-inline>\n",
       "\t<li>519</li>\n",
       "\t<li>12178</li>\n",
       "</ol>\n"
      ],
      "text/latex": [
       "\\begin{enumerate*}\n",
       "\\item 519\n",
       "\\item 12178\n",
       "\\end{enumerate*}\n"
      ],
      "text/markdown": [
       "1. 519\n",
       "2. 12178\n",
       "\n",
       "\n"
      ],
      "text/plain": [
       "[1]   519 12178"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "if(! all(colnames(df_out) == rownames(metadata))){\n",
    "    df_out = df_out[,rownames(metadata)]\n",
    "    dim(df_out)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "saveRDS(df_out, file = '../../output/feature_matrices/FM_SCRAT_cusanovich2018subset_motifs.rds')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "R version 3.5.1 (2018-07-02)\n",
       "Platform: x86_64-conda_cos6-linux-gnu (64-bit)\n",
       "Running under: CentOS Linux 7 (Core)\n",
       "\n",
       "Matrix products: default\n",
       "BLAS/LAPACK: /data/pinello/SHARED_SOFTWARE/anaconda3/envs/ATACseq_SCRAT/lib/R/lib/libRblas.so\n",
       "\n",
       "locale:\n",
       " [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              \n",
       " [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    \n",
       " [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   \n",
       " [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 \n",
       " [9] LC_ADDRESS=C               LC_TELEPHONE=C            \n",
       "[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       \n",
       "\n",
       "attached base packages:\n",
       "[1] stats4    parallel  stats     graphics  grDevices utils     datasets \n",
       "[8] methods   base     \n",
       "\n",
       "other attached packages:\n",
       " [1] SCRAT_0.99.0                SCRATdatahg19_0.99.1       \n",
       " [3] GenomicAlignments_1.18.1    Rsamtools_1.34.0           \n",
       " [5] Biostrings_2.50.2           XVector_0.22.0             \n",
       " [7] SummarizedExperiment_1.12.0 DelayedArray_0.8.0         \n",
       " [9] BiocParallel_1.16.6         matrixStats_0.54.0         \n",
       "[11] Biobase_2.42.0              GenomicRanges_1.34.0       \n",
       "[13] GenomeInfoDb_1.18.1         IRanges_2.16.0             \n",
       "[15] S4Vectors_0.20.1            BiocGenerics_0.28.0        \n",
       "[17] usethis_1.5.0               devtools_2.0.2             \n",
       "\n",
       "loaded via a namespace (and not attached):\n",
       " [1] tsne_0.1-3             bitops_1.0-6           fs_1.2.7              \n",
       " [4] RColorBrewer_1.1-2     rprojroot_1.3-2        repr_0.19.2           \n",
       " [7] tools_3.5.1            backports_1.1.4        R6_2.4.0              \n",
       "[10] DT_0.5                 KernSmooth_2.23-15     lazyeval_0.2.2        \n",
       "[13] colorspace_1.4-1       withr_2.1.2            tidyselect_0.2.5      \n",
       "[16] prettyunits_1.0.2      processx_3.3.0         compiler_3.5.1        \n",
       "[19] cli_1.1.0              desc_1.2.0             caTools_1.17.1.2      \n",
       "[22] scales_1.0.0           callr_3.2.0            pbdZMQ_0.3-3          \n",
       "[25] stringr_1.4.0          digest_0.6.18          shinyBS_0.61          \n",
       "[28] scatterD3_0.9          dbscan_1.1-3           base64enc_0.1-3       \n",
       "[31] pkgconfig_2.0.2        htmltools_0.3.6        sessioninfo_1.1.1     \n",
       "[34] htmlwidgets_1.3        rlang_0.3.4            shiny_1.3.2           \n",
       "[37] jsonlite_1.6           mclust_5.4.3           gtools_3.8.1          \n",
       "[40] dplyr_0.8.0.1          RCurl_1.95-4.11        magrittr_1.5          \n",
       "[43] GenomeInfoDbData_1.2.0 Matrix_1.2-17          Rcpp_1.0.1            \n",
       "[46] IRkernel_0.8.15        munsell_0.5.0          stringi_1.4.3         \n",
       "[49] zlibbioc_1.28.0        pkgbuild_1.0.3         gplots_3.0.1.1        \n",
       "[52] plyr_1.8.4             grid_3.5.1             gdata_2.18.0          \n",
       "[55] promises_1.0.1         crayon_1.3.4           lattice_0.20-38       \n",
       "[58] IRdisplay_0.7.0        ps_1.3.0               pillar_1.3.1          \n",
       "[61] uuid_0.1-2             reshape2_1.4.3         pkgload_1.0.2         \n",
       "[64] glue_1.3.1             evaluate_0.13          remotes_2.0.4         \n",
       "[67] httpuv_1.5.1           gtable_0.3.0           purrr_0.3.2           \n",
       "[70] assertthat_0.2.1       ggplot2_3.1.1          mime_0.6              \n",
       "[73] xtable_1.8-4           later_0.8.0            tibble_2.1.1          \n",
       "[76] pheatmap_1.0.12        memoise_1.1.0          ellipse_0.4.1         "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sessionInfo()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "save.image(file = 'SCRAT_cusanovich2018subset.RData')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R [conda env:ATACseq_SCRAT]",
   "language": "R",
   "name": "conda-env-ATACseq_SCRAT-r"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}