library(gdata) library(oligo) library(pd.mogene.2.1.st) library(mogene21sttranscriptcluster.db) library(Heatplus) library(ggplot2) library(reshape2) ## Help Documentation describe = function(obj) { if ('help' %in% names(attributes(obj))) { writeLines(attr(obj, 'help')) } } attr(describe, 'help') = " This function prints the contents of the 'help' attribute of any R object. It is meant to provide help documentation in the same vein as Docstrings in Python. " make.colors.boxplot <- function(col.strains) { use.cols <- darkColors(length(unique(col.strains))) names(use.cols) <- unique(col.strains) return(as.character(use.cols[col.strains])) } make.boxplot <- function(use.exprs, type=c("raw", "norm"), make.pdf=F, base.name="test", order.by=c("Mating", "Sex", "Number"), color.by="Mating", highlight.names=NULL, ...) { type <- match.arg(type) #use.exprs <- switch(type, raw=rma(raw.exprs, normalize=FALSE, target="core"), norm=rma(raw.exprs, normalize=TRUE, target="core")) basic.ord <- do.call("order", pData(use.exprs)[,order.by,drop=F]) use.exprs <- use.exprs[,basic.ord] layout(c(1,1,1,1,2,3,3,3)) par(mar=c(0,4,4,2)) if (type == 'raw') {title = "Raw Expression (Background Corrected)"} if (type == 'norm') {title = "Normalized Expression"} boxplot(exprs(use.exprs), xaxt="n", ylab="Expression", main=title, outline=FALSE, col=make.colors.boxplot(pData(use.exprs)[,color.by])) #RIN subplot if ('RIN' %in% names(pData(use.exprs))) { par(mar=c(0,4,1,2)) pData(use.exprs)$RIN = as.numeric(as.character(pData(use.exprs)$RIN)) plot(x=1:ncol(use.exprs), y=pData(use.exprs)$RIN, xlim=c(1, ncol(use.exprs)) + c(-.5, .5), ylim=range(pData(use.exprs)$RIN, na.rm=T), xaxt="n", type="p", ylab="RIN", xlab="", col=make.colors.boxplot(pData(use.exprs)[,color.by])) abline(h=7, lty="dashed") } else { warning("RIN values were not found in the sample annotations.") } #labels and such if (missing(highlight.names) || is.null(highlight.names)) { Axis(at=1:ncol(use.exprs), side=1, labels=colnames(use.exprs), las=2, ...) } else if(is.character(highlight.names) && all(highlight.names %in% colnames(use.exprs))) { should.col <- ifelse(colnames(use.exprs) %in% highlight.names, 'red', 'black') rle.col <- Rle(should.col) lo.pos <- 1 for (i in 1:nrun(rle.col)) { positions <- lo.pos:(runLength(rle.col)[i] + lo.pos-1) Axis(at=positions, side=1, labels=colnames(use.exprs)[positions], las=2, col.axis=runValue(rle.col)[i], ...) lo.pos <- max(positions) + 1 } Axis(at=1:ncol(use.exprs), side=1, labels=colnames(use.exprs), las=2, col.axis=ifelse(should.col, 'red', 'black'), ...) } if (make.pdf) { d = dev.copy2pdf(file=paste0(base.name, '_', type, '_boxplot.pdf'), width=16, height=10) writeLines(paste0("Figure saved to: ", paste0(base.name, '_', type, '_boxplot.pdf'))) d = dev.off() } } attr(make.boxplot, 'help') = " This function creates a boxplot of the raw or normalized expression values. If RIN values are available a subplot will be added. Parameters: use.exprs: An ExpressionSet returned by the rma() function. type: The type of plot to create, either 'raw' or 'norm'. make.pdf: A logical indicating if a PDF should be created. base.name: The base filename of the PDF to create (default is 'test') order.by: A character vector containing the column names that will be used to order the samples (default is c('Mating', 'Sex', 'Number')). color.by: The column name used to assign colors to the samples (default is 'Mating'). highlight.names: A character vector containing sample names (default=NULL). Can be used to highlight samples in the plot. ...: Additional parameters can be passed to format the x-axis labels. " #A convienience function for making bacterial spike plots #Input: # raw.exprs: A GeneFeatureSet or similar object which has an rma method returning an ExpressionSet # base.name: The base name of the PDF to generate (if make.pdf == T) # pgf.file: probe group file from Affy for the appropriate array type. #Output: # A PDF named as base.name_bac_spikes.pdf plot.bac.spikes <- function(use.exprs, pgf.file, base.name="test", make.pdf=F) { require(ggplot2) require(affxparser) #use.exprs <- rma(raw.exprs) if (file.exists(pgf.file)) { pgf.list <- readPgf(pgf.file) } else { stop("Probe group file not found!") } probeset.types <- data.frame(fsetid=pgf.list$probesetId, fsetName=pgf.list$probesetName, type=pgf.list$probesetType, stringsAsFactors=FALSE) spike.probes <- probeset.types[probeset.types$type == 'control->affx->bac_spike',] spike.exprs <- exprs(use.exprs)[as.character(unique(spike.probes$fsetid)),] spike.dta <- melt(spike.exprs) spike.dta.merge <- merge(spike.dta, spike.probes, by.x="Var1", by.y="fsetid", all=F, incomparables=NA, sort=FALSE) spike.dta.merge$`Bac. Spike` <- sapply(strsplit(spike.dta.merge$fsetName, "-"), "[[", 4) #should be BioBaffx->polya_spike",] spike.exprs <- exprs(use.exprs)[as.character(unique(poly.a.spikes$fsetid)),] spike.dta <- melt(spike.exprs) spike.dta.merge <- merge(spike.dta, poly.a.spikes, by.x="Var1", by.y="fsetid", all=F, incomparables=NA, sort=FALSE) spike.dta.merge$type <- ifelse(grepl("AFFX-r2-Bs", spike.dta.merge$fsetName), "AFFX-r2-Bs", "AFFX") spike.dta.merge$pos <- sub("_*s*_[as]t", "", sapply(strsplit(spike.dta.merge$fsetName, "-"), function(x) x[length(x)])) spike.dta.merge$pos <- factor(spike.dta.merge$pos, levels=c("5", "M", "3"), ordered=T) spike.dta.merge$direction <- sapply(strsplit(spike.dta.merge$fsetName, "[-_]"), function(x) x[length(x)]) split.fset <- strsplit(spike.dta.merge$fsetName, "-") spike.dta.merge$Spike <- ifelse(spike.dta.merge$type == "AFFX", sapply(split.fset, "[", 2), sapply(split.fset, "[", 4)) spike.dta.merge$Spike <- sub("X", "", spike.dta.merge$Spike) substr(spike.dta.merge$Spike, 1, 1) <- toupper(substr(spike.dta.merge$Spike, 1, 1)) #from the affy exon/gene array whitepaper: lys