## Load libraries and functions for array processing and QA/QC plots source('./scripts/array_qa_qc_functions.r') ## Read the annotation spreadsheet into R ## You will have to change the directory path annot_dir = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/Bat_Virus_Array' setwd(annot_dir) sample_annot = read.xls('BatPlate_Annotation_editedMM.xlsx', header=T, as.is=T, na.strings=c(""," ", "NA", "#DIV/0!")) rownames(sample_annot) = sample_annot$ID ## Check annotation dataframe head(sample_annot[,1:5]) ## Set directory where .CEL files are located, and get the list of files ## You will have to change the directory path cel_dir = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/Bat_Virus_Array/data' cel_files = list.celfiles(cel_dir) ## Check file names cel_files[1:3] ## Create sample names (these must match the annotation file) sample_names = gsub("_2.CEL", "", cel_files) ## Check sample names sample_names[1:5] ## Subset sample annotation dataframe sample_annot = sample_annot[sample_names,] length(sample_names) == dim(sample_annot)[1] ## Create a phenoData object phenoData = new("AnnotatedDataFrame", data=sample_annot) phenoData ## Load the raw expression data raw.exprs = read.celfiles(file.path(cel_dir, cel_files), pkgname="pd.mogene.2.1.st", sampleNames=sample_names, phenoData=phenoData) head(pData(phenoData)[,1:5]) ## Save raw expression to file in same directory as .CEL files ## This may be used as input for DE and Pathway analysis save(raw.exprs, file=file.path(cel_dir, 'bat_virus_raw_exprs_2-FEB-2016.rda')) ## Create un-normalized ExpressionSet bgcor.exprs = rma(raw.exprs, normalize=FALSE, target="core") ## Create normalized ExpressionSet norm.exprs = rma(raw.exprs, normalize=TRUE, target="core") ## Check normalized expression matrix exprs(norm.exprs)[1:5,1:5] ## Save normalized expression to file (optional) #save(norm.exprs, file="bat_virus_array_normalized.rda") describe(make.boxplot) ## Boxplot of un-normalized expression values make.boxplot(bgcor.exprs, type = 'raw', order.by=c("Mating", "Number"), color.by="Mating", make.pdf=F) ## Boxplot of normalized expression values make.boxplot(norm.exprs, type = 'norm', order.by=c("Mating", "Number"), color.by="Mating", make.pdf=F) describe(plot.bac.spikes) ## The probe group file from Affy for the array pg_file = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/MoGene-2_1-st.pgf' ## Create bacterial spike plot plot.bac.spikes(norm.exprs, pg_file, make.pdf=F) describe(plot.polya.spikes) ## Create polyA spike plot plot.polya.spikes(norm.exprs, pg_file, make.pdf=F) describe(make.heatmap) ## Create annotated heatmap (don't include MAQC in heatmap) make.heatmap(norm.exprs[, norm.exprs$ID != 'MAQC'], base.factors=c('Sex', 'D4_percent'), make.pdf=F) ## Load MAQC Annotations maqc_annot_dir = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/MAQC' maqc_annot = read.xls(file.path(maqc_annot_dir, 'maqc_annotation.xlsx'), header=T, as.is=T, na.strings=c(""," ", "NA", "#DIV/0!")) rownames(maqc_annot) = maqc_annot$ID ## Set directory where MAQC .CEL files are located, and get the list of files ## You will have to change the directory path maqc_cel_dir = '/Users/mooneymi/Documents/MyDocuments/SystemsImmunogenetics/Expression/MAQC' maqc_cel_files = list.celfiles(maqc_cel_dir) ## Check file names maqc_cel_files[1:3] ## Create sample names (these must match the annotation file) maqc_names = gsub(".CEL", "", maqc_cel_files) ## Create a phenoData object maqcData = new("AnnotatedDataFrame", data=maqc_annot) maqcData ## Load the MAQC raw expression data maqc.raw.exprs = read.celfiles(file.path(maqc_cel_dir, maqc_cel_files), pkgname="pd.mogene.2.1.st", sampleNames=maqc_names, phenoData=maqcData) ## Create un-normalized MAQC ExpressionSet maqc.bgcor.exprs = rma(maqc.raw.exprs, normalize=FALSE, target="core") ## Create normalized MAQC ExpressionSet maqc.norm.exprs = rma(maqc.raw.exprs, normalize=TRUE, target="core") ## Create boxplots make.boxplot(maqc.bgcor.exprs, type = 'raw', order.by=c("Date.Downloaded"), color.by="Date.Downloaded", make.pdf=F) ## Create boxplots make.boxplot(maqc.norm.exprs, type = 'norm', order.by=c("Date.Downloaded"), color.by="Date.Downloaded", make.pdf=F)