## ----knitrsetup, include=FALSE, cache=FALSE------------------------------ library("knitr") opts_chunk$set(width=40, tidy.opts=list(width.cutoff=45), tidy=FALSE, fig.path=file.path("figures", "species/"), fig.align="center", fig.height=4.25, comment=NA, prompt=FALSE) ## ----setup, echo=TRUE, eval=FALSE---------------------------------------- # install.packages(c("MALDIquant", "MALDIquantForeign", # "sda", "crossval", "devtools")) # library("devtools") # install_github("sgibb/MALDIquantExamples") ## ----loadpackages, echo=FALSE-------------------------------------------- suppressPackageStartupMessages(library("MALDIquantExamples")) suppressPackageStartupMessages(library("xtable")) ## ----packages------------------------------------------------------------ ## the main MALDIquant package library("MALDIquant") ## the import/export routines for MALDIquant library("MALDIquantForeign") ## example data library("MALDIquantExamples") ## ----import-------------------------------------------------------------- spectra <- import(getPathSpecies(), verbose=FALSE) ## ----qc------------------------------------------------------------------ table(sapply(spectra, length)) any(sapply(spectra, isEmpty)) all(sapply(spectra, isRegular)) ## ----trim---------------------------------------------------------------- spectra <- trim(spectra) ## ----plotseed, echo=FALSE------------------------------------------------ set.seed(123) ## ----plot---------------------------------------------------------------- idx <- sample(length(spectra), size=2) plot(spectra[[idx[1]]]) plot(spectra[[idx[2]]]) ## ----vs------------------------------------------------------------------ spectra <- transformIntensity(spectra, method="sqrt") ## ----fwhm---------------------------------------------------------------- plot(spectra[[1]], type="b", xlim=c(2235.3, 2252.0), ylim=c(45, 100)) abline(h=72, col=4, lty=2) plot(spectra[[1]], type="b", xlim=c(11220, 11250), ylim=c(24, 40)) abline(h=32, col=4, lty=2) ## ----sm------------------------------------------------------------------ spectra <- smoothIntensity(spectra, method="SavitzkyGolay", halfWindowSize=10) ## ----be------------------------------------------------------------------ ## define iteration steps: 25, 50, ..., 100 iterations <- seq(from=25, to=100, by=25) ## define different colors for each step col <- rainbow(length(iterations)) plot(spectra[[1]], xlim=c(2000, 12000)) ## draw different baseline estimates for (i in seq(along=iterations)) { baseline <- estimateBaseline(spectra[[1]], method="SNIP", iterations=iterations[i]) lines(baseline, col=col[i], lwd=2) } legend("topright", legend=iterations, col=col, lwd=1) ## ----bc------------------------------------------------------------------ spectra <- removeBaseline(spectra, method="SNIP", iterations=25) plot(spectra[[1]]) ## ----cb------------------------------------------------------------------ spectra <- calibrateIntensity(spectra, method="TIC") ## ----pa------------------------------------------------------------------ spectra <- alignSpectra(spectra) ## ----metadata------------------------------------------------------------ metaData(spectra[[1]])$spot ## ----spots--------------------------------------------------------------- spots <- sapply(spectra, function(x)metaData(x)$spot) species <- sapply(spectra, function(x)metaData(x)$sampleName) head(spots) head(species) ## ----average------------------------------------------------------------- avgSpectra <- averageMassSpectra(spectra, labels=paste0(species, spots)) ## ----noise--------------------------------------------------------------- ## define snrs steps: 1, 1.5, ... 2.5 snrs <- seq(from=1, to=2.5, by=0.5) ## define different colors for each step col <- rainbow(length(snrs)) ## estimate noise noise <- estimateNoise(avgSpectra[[1]], method="SuperSmoother") plot(avgSpectra[[1]], xlim=c(6000, 16000), ylim=c(0, 0.0016)) for (i in seq(along=snrs)) { lines(noise[, "mass"], noise[, "intensity"]*snrs[i], col=col[i], lwd=2) } legend("topright", legend=snrs, col=col, lwd=1) ## ----pd------------------------------------------------------------------ peaks <- detectPeaks(avgSpectra, SNR=2, halfWindowSize=10) ## ----pdp----------------------------------------------------------------- plot(avgSpectra[[1]], xlim=c(6000, 16000), ylim=c(0, 0.0016)) points(peaks[[1]], col="red", pch=4) ## ----pb------------------------------------------------------------------ peaks <- binPeaks(peaks) ## ----pf------------------------------------------------------------------ peaks <- filterPeaks(peaks, minFrequency=0.25) ## ----spots2-------------------------------------------------------------- spots <- sapply(avgSpectra, function(x)metaData(x)$spot) species <- sapply(avgSpectra, function(x)metaData(x)$sampleName) species <- factor(species) # convert to factor # (needed later in crossval) ## ----fm------------------------------------------------------------------ featureMatrix <- intensityMatrix(peaks, avgSpectra) rownames(featureMatrix) <- paste(species, spots, sep=".") ## ----clust, fig.height=5------------------------------------------------- library("pvclust") pv <- pvclust(t(featureMatrix), method.hclust="ward.D2", method.dist="euclidean") plot(pv, print.num=FALSE) ## ----dda, fig.height=7.5------------------------------------------------- library("sda") ddar <- sda.ranking(Xtrain=featureMatrix, L=species, fdr=FALSE, diagonal=TRUE) plot(ddar) ## ----lda, fig.height=7.5------------------------------------------------- ldar <- sda.ranking(Xtrain=featureMatrix, L=species, fdr=FALSE, diagonal=FALSE) plot(ldar) ## ----predictfuncv-------------------------------------------------------- library("crossval") predfun <- function(Xtrain, Ytrain, Xtest, Ytest, numVars, diagonal=FALSE) { # estimate ranking and determine the best numVars variables ra <- sda.ranking(Xtrain, Ytrain, verbose=FALSE, diagonal=diagonal, fdr=FALSE) selVars <- ra[,"idx"][1:numVars] # fit and predict sda.out <- sda(Xtrain[, selVars, drop=FALSE], Ytrain, diagonal=diagonal, verbose=FALSE) ynew <- predict(sda.out, Xtest[, selVars, drop=FALSE], verbose=FALSE)$class # compute accuracy acc <- mean(Ytest == ynew) return(acc) } ## ----cvsetup------------------------------------------------------------- K <- 5 # number of folds B <- 20 # number of repetitions ## ----cvtop10------------------------------------------------------------- set.seed(12345) cv.dda10 <- crossval(predfun, X=featureMatrix, Y=species, K=K, B=B, numVars=10, diagonal=FALSE, verbose=FALSE) cv.dda10$stat ## ----cvoptimaldd--------------------------------------------------------- npeaks <- c(1:15, ncol(featureMatrix)) # number of peaks ## ----cvoptimaldda-------------------------------------------------------- # estimate accuracy for DDA set.seed(12345) cvsim.dda <- sapply(npeaks, function(i) { cv <- crossval(predfun, X=featureMatrix, Y=species, K=K, B=B, numVars=i, diagonal=TRUE, verbose=FALSE) return(cv$stat) }) ## ----cvoptimallda-------------------------------------------------------- # estimate accuracy for LDA set.seed(12345) cvsim.lda <- sapply(npeaks, function(i) { cv <- crossval(predfun, X=featureMatrix, Y=species, K=K, B=B, numVars=i, diagonal=FALSE, verbose=FALSE) return(cv$stat) }) ## ----cvoptimaltable------------------------------------------------------ result.sim <- cbind(nPeaks=npeaks, "DDA-ACC"=cvsim.dda, "LDA-ACC"=cvsim.lda) ## ----cvoptimaltablelatex, echo=FALSE, results="asis"--------------------- xtable(result.sim, booktabs=TRUE, digits=c(0, 0, 3, 3)) ## ----sessioninfo, echo=FALSE, results="asis"----------------------------- toLatex(sessionInfo(), locale=FALSE)