---
title: "WGBS Data Analysis"
output:
html_notebook
---
```{r, include = T}
library(DSS)
library(bsseq)
library(gdata)
library(dplyr)
library(kableExtra)
library(ggplot2)
library(knitr)
```
```{r echo=FALSE}
colFmt = function(x,color){
outputFormat = opts_knit$get("rmarkdown.pandoc.to")
if(outputFormat == 'latex')
paste("\\textcolor{",color,"}{",x,"}",sep="")
else if(outputFormat == 'html')
paste("",x,"",sep="")
else
x
}
```
### Preprocessing
```{r echo=TRUE}
# get sample names
samples <- read.table(file="../scripts/allsamples.txt", sep="\t", header=F, stringsAsFactors=F)$V1
# read in methylation data in tab delimited format:
dat.list <- vector(mode = "list", length = length(samples))
for (i in 1:length(samples)){
dat.list[[i]] <- read.table(paste0("../04-Methylation/", samples[i], ".CpG.counts.txt"), header=F, col.names=c("chr", "pos", "N", "X"))
}
# create BSseqData and save for future easy loading
BSobj <- makeBSseqData(dat.list, samples)
save(BSobj, file="BSobj.RData")
#load("BSobj.RData")
```
The original data included `r dim(BSobj)[1]` CpG sites across all samples.
```{r echo=TRUE}
# remove unplaced contigs from reference
chr.use <- paste0("chr", c(1:22, "M", "X", "Y"))
dat <- chrSelectBSseq(BSobj, seqnames= chr.use, order=T)
```
CpG sites from unplaced contigs were removed, leaving `r dim(dat)[1]` CpG sites.
Multidimensional scaling (MDS) plot of raw methylation data.
```{r echo=TRUE}
# generate meta data from samples
stage <- c(rep("3M", 3), rep("24M", 3))
pdata <- data.frame(Samples=samples, EM.stage=stage, stringsAsFactors=F)
pdata$EM.stage <- factor(pdata$EM.stage, levels=c("3M", "24M"))
idx <- match(samples, pdata$Samples)
pdata <- pdata[idx,]
pchs <- ifelse(pdata$EM.stage == "3M", 15, 16)
#
# MDS plot of raw methylation percentages
pct.meth <- getMeth(dat, type = "raw")
limma::plotMDS(asin(pct.meth), col = "blue", pch = pchs)
legend("bottomright", title = "EM.stage", legend = levels(pdata$EM.stage), pch = unique(pchs), text.font = 0.8)
```
### Differential methylation analyses
Differential methylation analyses were conducted in [DSS](https://www.bioconductor.org/packages/release/bioc/manuals/DSS/man/DSS.pdf) bioconductor package.
##### Comparison EM.24M vs EM.3M
Wald test was used.
#### Output files include the following:
* mu1, mu2: group means
* diff, diff.se: differece between group means and the standard error of differences
* stat: hypothesis test statistics (Wald test)
* phi1, phi2: dispersions
* pvals: Raw p-value from test that the methylation difference differs from 0
* fdrs: Benjamini-Hochberg false discovery rate (FDR) adjusted p-value
```{r, echo = FALSE, message = FALSE, warning = FALSE}
# run test of differential methylation for each CpG site and save for future easy loading
dmlTest <- DMLtest(BSobj, group1=samples[1:3], group2=samples[4:6], smoothing=TRUE)
save(dmlTest, file="DML.RData")
#load("DML.RData")
# call differential methylated CpG
dmls <- callDML(dmlTest, p.threshold=1.0)
# write to file results
write.table(dmls, file="Differential_methylation_CpG_24Mvs3M.txt", sep="\t", col.names=T, row.names=F, quote=F)
```
```{r echo = FALSE}
# list the top 500 differentially methylated CpG sites (ordered by P values)
n <- ncol(dmls)
header1 <- n
names(header1) <- paste0("Number of differentially methylated CpGs with FDR adjusted P < 0.05 = ", length(which(dmls$fdr < 0.05)))
header2 <- n
names(header2) <- "Top 500 CpG sites"
kable(dmls[1:500,], align = 'c', row.names=F) %>%
add_header_above(header1, align = 'l') %>%
add_header_above(header2, font_size = "larger", bold = T, align = "l") %>%
kable_styling(bootstrap_options = c("hover", "condensed", "responsive"), full_width = T) %>%
scroll_box(height = "500px")
```
```{r echo = FALSE}
# call differentially methylated regions (DMR)
dmrs <- callDMR(dmlTest, delta=0.1, p.threshold=0.01)
write.table(dmrs, file="Differential_methylation_regions_24Mvs3M.txt", sep="\t", col.names=T, row.names=F, quote=F)
```
```{r echo = FALSE}
# list DMRs
n <- ncol(dmrs)
header1 <- n
names(header1) <- paste0("Number of differentially methylated regions = ", nrow(dmrs))
header2 <- n
names(header2) <- "The list of DMRs:"
kable(dmrs, align = 'c', row.names=F) %>%
add_header_above(header1, align = 'l') %>%
add_header_above(header2, font_size = "larger", bold = T, align = "l") %>%
kable_styling(bootstrap_options = c("hover", "condensed", "responsive"), full_width = T) %>%
scroll_box(height = "500px")
```
##### Visualization of individual DMR
```{r echo=FALSE}
showOneDMR(dmrs[1,], BSobj)
```
### R session information
```{r}
sessionInfo()
```