--- title: "WGBS Data Analysis" output: html_notebook --- ```{r, include = T} library(DSS) library(bsseq) library(gdata) library(dplyr) library(kableExtra) library(ggplot2) library(knitr) ``` ```{r echo=FALSE} colFmt = function(x,color){ outputFormat = opts_knit$get("rmarkdown.pandoc.to") if(outputFormat == 'latex') paste("\\textcolor{",color,"}{",x,"}",sep="") else if(outputFormat == 'html') paste("",x,"",sep="") else x } ``` ### Preprocessing ```{r echo=TRUE} # get sample names samples <- read.table(file="../scripts/allsamples.txt", sep="\t", header=F, stringsAsFactors=F)$V1 # read in methylation data in tab delimited format: dat.list <- vector(mode = "list", length = length(samples)) for (i in 1:length(samples)){ dat.list[[i]] <- read.table(paste0("../04-Methylation/", samples[i], ".CpG.counts.txt"), header=F, col.names=c("chr", "pos", "N", "X")) } # create BSseqData and save for future easy loading BSobj <- makeBSseqData(dat.list, samples) save(BSobj, file="BSobj.RData") #load("BSobj.RData") ```
The original data included `r dim(BSobj)[1]` CpG sites across all samples. ```{r echo=TRUE} # remove unplaced contigs from reference chr.use <- paste0("chr", c(1:22, "M", "X", "Y")) dat <- chrSelectBSseq(BSobj, seqnames= chr.use, order=T) ```
CpG sites from unplaced contigs were removed, leaving `r dim(dat)[1]` CpG sites.
Multidimensional scaling (MDS) plot of raw methylation data.
```{r echo=TRUE} # generate meta data from samples stage <- c(rep("3M", 3), rep("24M", 3)) pdata <- data.frame(Samples=samples, EM.stage=stage, stringsAsFactors=F) pdata$EM.stage <- factor(pdata$EM.stage, levels=c("3M", "24M")) idx <- match(samples, pdata$Samples) pdata <- pdata[idx,] pchs <- ifelse(pdata$EM.stage == "3M", 15, 16) # # MDS plot of raw methylation percentages pct.meth <- getMeth(dat, type = "raw") limma::plotMDS(asin(pct.meth), col = "blue", pch = pchs) legend("bottomright", title = "EM.stage", legend = levels(pdata$EM.stage), pch = unique(pchs), text.font = 0.8) ```
### Differential methylation analyses Differential methylation analyses were conducted in [DSS](https://www.bioconductor.org/packages/release/bioc/manuals/DSS/man/DSS.pdf) bioconductor package.
##### Comparison EM.24M vs EM.3M Wald test was used. #### Output files include the following: * mu1, mu2: group means * diff, diff.se: differece between group means and the standard error of differences * stat: hypothesis test statistics (Wald test) * phi1, phi2: dispersions * pvals: Raw p-value from test that the methylation difference differs from 0 * fdrs: Benjamini-Hochberg false discovery rate (FDR) adjusted p-value
```{r, echo = FALSE, message = FALSE, warning = FALSE} # run test of differential methylation for each CpG site and save for future easy loading dmlTest <- DMLtest(BSobj, group1=samples[1:3], group2=samples[4:6], smoothing=TRUE) save(dmlTest, file="DML.RData") #load("DML.RData") # call differential methylated CpG dmls <- callDML(dmlTest, p.threshold=1.0) # write to file results write.table(dmls, file="Differential_methylation_CpG_24Mvs3M.txt", sep="\t", col.names=T, row.names=F, quote=F) ```
```{r echo = FALSE} # list the top 500 differentially methylated CpG sites (ordered by P values) n <- ncol(dmls) header1 <- n names(header1) <- paste0("Number of differentially methylated CpGs with FDR adjusted P < 0.05 = ", length(which(dmls$fdr < 0.05))) header2 <- n names(header2) <- "Top 500 CpG sites" kable(dmls[1:500,], align = 'c', row.names=F) %>% add_header_above(header1, align = 'l') %>% add_header_above(header2, font_size = "larger", bold = T, align = "l") %>% kable_styling(bootstrap_options = c("hover", "condensed", "responsive"), full_width = T) %>% scroll_box(height = "500px") ```
```{r echo = FALSE} # call differentially methylated regions (DMR) dmrs <- callDMR(dmlTest, delta=0.1, p.threshold=0.01) write.table(dmrs, file="Differential_methylation_regions_24Mvs3M.txt", sep="\t", col.names=T, row.names=F, quote=F) ``` ```{r echo = FALSE} # list DMRs n <- ncol(dmrs) header1 <- n names(header1) <- paste0("Number of differentially methylated regions = ", nrow(dmrs)) header2 <- n names(header2) <- "The list of DMRs:" kable(dmrs, align = 'c', row.names=F) %>% add_header_above(header1, align = 'l') %>% add_header_above(header2, font_size = "larger", bold = T, align = "l") %>% kable_styling(bootstrap_options = c("hover", "condensed", "responsive"), full_width = T) %>% scroll_box(height = "500px") ```
##### Visualization of individual DMR
```{r echo=FALSE} showOneDMR(dmrs[1,], BSobj) ```
### R session information ```{r} sessionInfo() ```