{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Installation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "`devtools::install_github(\"zji90/SCRATdatamm9\")`  \n",
    "`source(\"https://raw.githubusercontent.com/zji90/SCRATdata/master/installcode.R\")`  "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  Import packages"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading required package: BiocGenerics\n",
      "Loading required package: parallel\n",
      "\n",
      "Attaching package: ‘BiocGenerics’\n",
      "\n",
      "The following objects are masked from ‘package:parallel’:\n",
      "\n",
      "    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,\n",
      "    clusterExport, clusterMap, parApply, parCapply, parLapply,\n",
      "    parLapplyLB, parRapply, parSapply, parSapplyLB\n",
      "\n",
      "The following objects are masked from ‘package:stats’:\n",
      "\n",
      "    IQR, mad, sd, var, xtabs\n",
      "\n",
      "The following objects are masked from ‘package:base’:\n",
      "\n",
      "    anyDuplicated, append, as.data.frame, basename, cbind, colMeans,\n",
      "    colnames, colSums, dirname, do.call, duplicated, eval, evalq,\n",
      "    Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,\n",
      "    lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,\n",
      "    pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,\n",
      "    rowSums, sapply, setdiff, sort, table, tapply, union, unique,\n",
      "    unsplit, which, which.max, which.min\n",
      "\n",
      "Loading required package: S4Vectors\n",
      "Loading required package: stats4\n",
      "\n",
      "Attaching package: ‘S4Vectors’\n",
      "\n",
      "The following object is masked from ‘package:base’:\n",
      "\n",
      "    expand.grid\n",
      "\n",
      "Loading required package: IRanges\n",
      "Loading required package: GenomeInfoDb\n",
      "Loading required package: GenomicRanges\n",
      "Loading required package: SummarizedExperiment\n",
      "Loading required package: Biobase\n",
      "Welcome to Bioconductor\n",
      "\n",
      "    Vignettes contain introductory material; view with\n",
      "    'browseVignettes()'. To cite Bioconductor, see\n",
      "    'citation(\"Biobase\")', and for packages 'citation(\"pkgname\")'.\n",
      "\n",
      "Loading required package: DelayedArray\n",
      "Loading required package: matrixStats\n",
      "\n",
      "Attaching package: ‘matrixStats’\n",
      "\n",
      "The following objects are masked from ‘package:Biobase’:\n",
      "\n",
      "    anyMissing, rowMedians\n",
      "\n",
      "Loading required package: BiocParallel\n",
      "\n",
      "Attaching package: ‘DelayedArray’\n",
      "\n",
      "The following objects are masked from ‘package:matrixStats’:\n",
      "\n",
      "    colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges\n",
      "\n",
      "The following objects are masked from ‘package:base’:\n",
      "\n",
      "    aperm, apply\n",
      "\n",
      "Loading required package: Biostrings\n",
      "Loading required package: XVector\n",
      "\n",
      "Attaching package: ‘Biostrings’\n",
      "\n",
      "The following object is masked from ‘package:DelayedArray’:\n",
      "\n",
      "    type\n",
      "\n",
      "The following object is masked from ‘package:base’:\n",
      "\n",
      "    strsplit\n",
      "\n",
      "Loading required package: Rsamtools\n",
      "Warning message:\n",
      "“replacing previous import ‘DT::dataTableOutput’ by ‘shiny::dataTableOutput’ when loading ‘SCRAT’”Warning message:\n",
      "“replacing previous import ‘DT::renderDataTable’ by ‘shiny::renderDataTable’ when loading ‘SCRAT’”Warning message:\n",
      "“replacing previous import ‘mclust::em’ by ‘shiny::em’ when loading ‘SCRAT’”"
     ]
    }
   ],
   "source": [
    "library(devtools)\n",
    "library(GenomicAlignments)\n",
    "library(Rsamtools)\n",
    "library(SCRATdatahg19)\n",
    "library(SCRAT)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Obtain Feature Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "start_time = Sys.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "metadata <- read.table('../input/metadata.tsv',\n",
    "                         header = TRUE,\n",
    "                         stringsAsFactors=FALSE,quote=\"\",row.names=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "SCRATsummary <- function (dir = \"\", genome, bamfile = NULL, singlepair = \"automated\", \n",
    "    removeblacklist = T, log2transform = T, adjustlen = T, featurelist = c(\"GENE\", \n",
    "        \"ENCL\", \"MOTIF_TRANSFAC\", \"MOTIF_JASPAR\", \"GSEA\"), customfeature = NULL, \n",
    "    Genestarttype = \"TSSup\", Geneendtype = \"TSSdown\", Genestartbp = 3000, \n",
    "    Geneendbp = 1000, ENCLclunum = 2000, Motifflank = 100, GSEAterm = \"c5.bp\", \n",
    "    GSEAstarttype = \"TSSup\", GSEAendtype = \"TSSdown\", GSEAstartbp = 3000, \n",
    "    GSEAendbp = 1000) \n",
    "{\n",
    "    if (is.null(bamfile)) {\n",
    "        bamfile <- list.files(dir, pattern = \".bam$\")\n",
    "    }\n",
    "    datapath <- system.file(\"extdata\", package = paste0(\"SCRATdata\", \n",
    "        genome))\n",
    "    bamdata <- list()\n",
    "    for (i in bamfile) {\n",
    "        filepath <- file.path(dir, i)\n",
    "        if (singlepair == \"automated\") {\n",
    "            bamfile <- BamFile(filepath)\n",
    "            tmpsingle <- readGAlignments(bamfile)\n",
    "            tmppair <- readGAlignmentPairs(bamfile)\n",
    "            pairendtf <- testPairedEndBam(bamfile)\n",
    "            if (pairendtf) {\n",
    "                tmp <- tmppair\n",
    "                startpos <- pmin(start(first(tmp)), start(last(tmp)))\n",
    "                endpos <- pmax(end(first(tmp)), end(last(tmp)))\n",
    "                id <- which(!is.na(as.character(seqnames(tmp))))\n",
    "                tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))\n",
    "            }\n",
    "            else {\n",
    "                tmp <- GRanges(tmpsingle)\n",
    "            }\n",
    "        }\n",
    "        else if (singlepair == \"single\") {\n",
    "            tmp <- GRanges(readGAlignments(filepath))\n",
    "        }\n",
    "        else if (singlepair == \"pair\") {\n",
    "            tmp <- readGAlignmentPairs(filepath)\n",
    "            startpos <- pmin(start(first(tmp)), start(last(tmp)))\n",
    "            endpos <- pmax(end(first(tmp)), end(last(tmp)))\n",
    "            id <- which(!is.na(as.character(seqnames(tmp))))\n",
    "            tmp <- GRanges(seqnames=as.character(seqnames(tmp))[id],IRanges(start=startpos[id],end=endpos[id]))\n",
    "        }\n",
    "        if (removeblacklist) {\n",
    "            load(paste0(datapath, \"/gr/blacklist.rda\"))\n",
    "            tmp <- tmp[-as.matrix(findOverlaps(tmp, gr))[, 1], \n",
    "                ]\n",
    "        }\n",
    "        bamdata[[i]] <- tmp\n",
    "    }\n",
    "    bamsummary <- sapply(bamdata, length)\n",
    "    allres <- NULL\n",
    "    datapath <- system.file(\"extdata\", package = paste0(\"SCRATdata\", \n",
    "        genome))\n",
    "    if (\"GENE\" %in% featurelist) {\n",
    "        print(\"Processing GENE features\")\n",
    "        load(paste0(datapath, \"/gr/generegion.rda\"))\n",
    "        if (Genestarttype == \"TSSup\") {\n",
    "            grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                start(gr) - as.numeric(Genestartbp), end(gr) + \n",
    "                  as.numeric(Genestartbp))\n",
    "        }\n",
    "        else if (Genestarttype == \"TSSdown\") {\n",
    "            grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                start(gr) + as.numeric(Genestartbp), end(gr) - \n",
    "                  as.numeric(Genestartbp))\n",
    "        }\n",
    "        else if (Genestarttype == \"TESup\") {\n",
    "            grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                end(gr) - as.numeric(Genestartbp), start(gr) + \n",
    "                  as.numeric(Genestartbp))\n",
    "        }\n",
    "        else if (Genestarttype == \"TESdown\") {\n",
    "            grstart <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                end(gr) + as.numeric(Genestartbp), start(gr) - \n",
    "                  as.numeric(Genestartbp))\n",
    "        }\n",
    "        if (Geneendtype == \"TSSup\") {\n",
    "            grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                start(gr) - as.numeric(Geneendbp), end(gr) + \n",
    "                  as.numeric(Geneendbp))\n",
    "        }\n",
    "        else if (Geneendtype == \"TSSdown\") {\n",
    "            grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                start(gr) + as.numeric(Geneendbp), end(gr) - \n",
    "                  as.numeric(Geneendbp))\n",
    "        }\n",
    "        else if (Geneendtype == \"TESup\") {\n",
    "            grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                end(gr) - as.numeric(Geneendbp), start(gr) + \n",
    "                  as.numeric(Geneendbp))\n",
    "        }\n",
    "        else if (Geneendtype == \"TESdown\") {\n",
    "            grend <- ifelse(as.character(strand(gr)) == \"+\", \n",
    "                end(gr) + as.numeric(Geneendbp), start(gr) - \n",
    "                  as.numeric(Geneendbp))\n",
    "        }\n",
    "        ngr <- names(gr)\n",
    "        gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart, \n",
    "            grend), end = pmax(grstart, grend)))\n",
    "        names(gr) <- ngr\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- end(gr) - start(gr) + 1\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "    }\n",
    "    if (\"ENCL\" %in% featurelist) {\n",
    "        print(\"Processing ENCL features\")\n",
    "        load(paste0(datapath, \"/gr/ENCL\", ENCLclunum, \".rda\"))\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "    }\n",
    "    if (\"MOTIF_TRANSFAC\" %in% featurelist) {\n",
    "        print(\"Processing MOTIF_TRANSFAC features\")\n",
    "        load(paste0(datapath, \"/gr/transfac1.rda\"))\n",
    "        gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "        load(paste0(datapath, \"/gr/transfac2.rda\"))\n",
    "        gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "        if (genome %in% c(\"hg19\", \"hg38\")) {\n",
    "            load(paste0(datapath, \"/gr/transfac3.rda\"))\n",
    "            gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "            tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "                i))\n",
    "            tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "            if (log2transform) {\n",
    "                tmp <- log2(tmp + 1)\n",
    "            }\n",
    "            if (adjustlen) {\n",
    "                grrange <- sapply(gr, function(i) sum(end(i) - \n",
    "                  start(i) + 1))\n",
    "                tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "            }\n",
    "            tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "            allres <- rbind(allres, tmp)\n",
    "        }\n",
    "    }\n",
    "    if (\"MOTIF_JASPAR\" %in% featurelist) {\n",
    "        print(\"Processing MOTIF_JASPAR features\")\n",
    "        load(paste0(datapath, \"/gr/jaspar1.rda\"))\n",
    "        gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "        load(paste0(datapath, \"/gr/jaspar2.rda\"))\n",
    "        gr <- flank(gr, as.numeric(Motifflank), both = T)\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- sapply(gr, function(i) sum(end(i) - start(i) + \n",
    "                1))\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "    }\n",
    "    if (\"GSEA\" %in% featurelist) {\n",
    "        print(\"Processing GSEA features\")\n",
    "        for (i in GSEAterm) {\n",
    "            load(paste0(datapath, \"/gr/GSEA\", i, \".rda\"))\n",
    "            allgr <- gr\n",
    "            for (sgrn in names(allgr)) {\n",
    "                gr <- allgr[[sgrn]]\n",
    "                if (GSEAstarttype == \"TSSup\") {\n",
    "                  grstart <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", start(gr) - as.numeric(GSEAstartbp), \n",
    "                    end(gr) + as.numeric(GSEAstartbp))\n",
    "                }\n",
    "                else if (GSEAstarttype == \"TSSdown\") {\n",
    "                  grstart <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", start(gr) + as.numeric(GSEAstartbp), \n",
    "                    end(gr) - as.numeric(GSEAstartbp))\n",
    "                }\n",
    "                else if (GSEAstarttype == \"TESup\") {\n",
    "                  grstart <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", end(gr) - as.numeric(GSEAstartbp), start(gr) + \n",
    "                    as.numeric(GSEAstartbp))\n",
    "                }\n",
    "                else if (GSEAstarttype == \"TESdown\") {\n",
    "                  grstart <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", end(gr) + as.numeric(GSEAstartbp), start(gr) - \n",
    "                    as.numeric(GSEAstartbp))\n",
    "                }\n",
    "                if (GSEAendtype == \"TSSup\") {\n",
    "                  grend <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", start(gr) - as.numeric(GSEAendbp), end(gr) + \n",
    "                    as.numeric(GSEAendbp))\n",
    "                }\n",
    "                else if (GSEAendtype == \"TSSdown\") {\n",
    "                  grend <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", start(gr) + as.numeric(GSEAendbp), end(gr) - \n",
    "                    as.numeric(GSEAendbp))\n",
    "                }\n",
    "                else if (GSEAendtype == \"TESup\") {\n",
    "                  grend <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", end(gr) - as.numeric(GSEAendbp), start(gr) + \n",
    "                    as.numeric(GSEAendbp))\n",
    "                }\n",
    "                else if (GSEAendtype == \"TESdown\") {\n",
    "                  grend <- ifelse(as.character(strand(gr)) == \n",
    "                    \"+\", end(gr) + as.numeric(GSEAendbp), start(gr) - \n",
    "                    as.numeric(GSEAendbp))\n",
    "                }\n",
    "                ngr <- names(gr)\n",
    "                gr <- GRanges(seqnames = seqnames(gr), IRanges(start = pmin(grstart, \n",
    "                  grend), end = pmax(grstart, grend)))\n",
    "                names(gr) <- ngr\n",
    "                allgr[[sgrn]] <- gr\n",
    "            }\n",
    "            gr <- allgr\n",
    "            tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "                i))\n",
    "            tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "            if (log2transform) {\n",
    "                tmp <- log2(tmp + 1)\n",
    "            }\n",
    "            if (adjustlen) {\n",
    "                grrange <- sapply(gr, function(i) sum(end(i) - \n",
    "                  start(i) + 1))\n",
    "                tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "            }\n",
    "            tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "            allres <- rbind(allres, tmp)\n",
    "        }\n",
    "    }\n",
    "    if (\"Custom\" %in% featurelist) {\n",
    "        print(\"Processing custom features\")\n",
    "        gr <- read.table(customfeature, as.is = T, sep = \"\\t\")\n",
    "        gr <- GRanges(seqnames = gr[, 1], IRanges(start = gr[, \n",
    "            2], end = gr[, 3]))\n",
    "        tmp <- sapply(bamdata, function(i) countOverlaps(gr, \n",
    "            i))\n",
    "        tmp <- sweep(tmp, 2, bamsummary, \"/\") * 10000\n",
    "        if (log2transform) {\n",
    "            tmp <- log2(tmp + 1)\n",
    "        }\n",
    "        if (adjustlen) {\n",
    "            grrange <- end(gr) - start(gr) + 1\n",
    "            tmp <- sweep(tmp, 1, grrange, \"/\") * 1e+06\n",
    "        }\n",
    "        tmp <- tmp[rowSums(tmp) > 0, , drop = F]\n",
    "        allres <- rbind(allres, tmp)\n",
    "    }\n",
    "    allres\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_out <- SCRATsummary(dir = \"../input/sc-bams_nodup/\", \n",
    "                               genome = \"mm9\",\n",
    "                               featurelist=\"MOTIF_JASPAR\",\n",
    "                               removeblacklist = FALSE,\n",
    "                               log2transform = FALSE, adjustlen = FALSE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "end_time <- Sys.time()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "end_time - start_time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "dim(df_out)\n",
    "df_out[1:5,1:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "head(sapply(strsplit(colnames(df_out), \"\\\\.\"),'[',2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "colnames(df_out) = sapply(strsplit(colnames(df_out), \"\\\\.\"),'[',2)\n",
    "dim(df_out)\n",
    "df_out[1:5,1:5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if(! all(colnames(df_out) == rownames(metadata))){\n",
    "    df_out = df_out[,rownames(metadata)]\n",
    "    dim(df_out)\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "saveRDS(df_out, file = '../output/feature_matrices/FM_SCRAT_cusanovich2018subset_motifs_no_blacklist_filtering.rds')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sessionInfo()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "save.image(file = '../output/SCRAT_cusanovich2018subset_no_blacklist_filtering.RData')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "R [conda env:ATACseq_SCRAT]",
   "language": "R",
   "name": "conda-env-ATACseq_SCRAT-r"
  },
  "language_info": {
   "codemirror_mode": "r",
   "file_extension": ".r",
   "mimetype": "text/x-r-source",
   "name": "R",
   "pygments_lexer": "r",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}