--- title: "Sarcoid Microbiome Figures and Data" author: "Erik Clarke" date: "4/20/2017" output: html_document params: data_fp: "/home/ecl/data/000_Sarcoidosis/099_Manuscript/data_files" figure_fp: "/home/ecl/data/000_Sarcoidosis/099_Manuscript/Figures" table_fp: "/home/ecl/data/000_Sarcoidosis/099_Manuscript/Tables" --- ```{r setup, error=FALSE} knitr::opts_chunk$set(error=FALSE) library(SarcoidMicrobiome) library(ggbeeswarm) library(forcats) library(stringr) library(ggplot2) library(dplyr) opts$data_fp <- params$data_fp opts$figure_fp <- params$figure_fp opts$table_fp <- params$table_fp theme_set( theme_classic(base_size = 12) + theme( plot.title = element_text(hjust=0), axis.ticks = element_line(size=0.75), axis.line.x = element_line(colour = 'black', size=0.6, linetype='solid'), axis.line.y = element_line(colour = 'black', size=0.6, linetype='solid'), strip.background = element_blank(), strip.text = element_text(face = "bold", hjust=0) ) ) # Green/brown colors # top.barchart.fills <- c( # "Healthy lymph node"="#a6611a", # "Healthy BAL"="#a6611a", # "Healthy paraffin"="#C0A675", # "Healthy prewash"="#C0A675", # "Sarcoidosis lymph node"="#018571", # "Sarcoidosis BAL"="#018571", # "Sarcoidosis paraffin"="#86C3AE", # "Sarcoidosis prewash"="#86C3AE" # ) # Red/blue colors top.barchart.fills <- c( "Healthy lymph node"="#1f78b4", "Healthy BAL"="#1f78b4", "Healthy paraffin"="#a6cee3", "Healthy prewash"="#a6cee3", "Sarcoidosis lymph node"="#ff7f00", "Sarcoidosis BAL"="#ff7f00", "Sarcoidosis paraffin"="#fdbf6f", "Sarcoidosis prewash"="#fdbf6f" ) # Virus contaminants tech.contam <- c( "Enterobacteria phage M13", "Enterobacteria phage T7", "Enterobacteria phage phiX174 sensu lato","Bacillus phage phi29", "Pseudomonas phage phi6", "Shamonda virus", "Human herpesvirus 7", "Human herpesvirus 6", "Cyprinid herpesvirus 3") ``` ```{r load-data} a.16s <- LoadData("A", "16S") a.its <- LoadData("A", "ITS") b.16s <- LoadData("B", "16S") b.its <- LoadData("B", "ITS") c.16s <- LoadData("C", "16S") c.its <- LoadData("C", "ITS") c.vir <- LoadData("C", "virome") de.16s <- LoadData("DE", "16S") de.its <- LoadData("DE", "ITS") de.wgs <- LoadData("DE", "WGS") # Reconcile and anonymize all the sample names (e.g. subjects) between # 16S/ITS/WGS sequencings of a set # BAL: .c.vir.subjectid <- as.character(c.vir$s$OriginatingSampleID) .c.vir.subjectid <- factor(str_replace(.c.vir.subjectid, "HUP", "")) .bal.unified <- fct_unify(list( droplevels(c.16s$s$DerivingOriginalSampleID), droplevels(c.its$s$DerivingSampleID), .c.vir.subjectid)) c.16s$s$SubjectID <- fct_anon_deterministic(.bal.unified[[1]], prefix="C") c.16s$agg <- left_join(c.16s$agg, c.16s$s) c.its$s$SubjectID <- fct_anon_deterministic(.bal.unified[[2]], prefix="C") c.its$agg <- left_join(c.its$agg, c.its$s) c.vir$s$SubjectID <- fct_anon_deterministic(.bal.unified[[3]], prefix="C") c.vir$agg <- left_join(c.vir$agg, c.vir$s) # LN/A: .a.all <- fct_unify(list( factor(a.16s$s$OriginatingSampleID), factor(a.its$s$OriginatingSampleID) )) a.16s$s$SubjectID <- fct_anon_deterministic(.a.all[[1]], prefix="A") a.16s$agg <- left_join(a.16s$agg, a.16s$s) a.its$s$SubjectID <- fct_anon_deterministic(.a.all[[2]], prefix="A") a.its$agg <- left_join(a.its$agg, a.its$s) # LN/B: .lnb.unified <- fct_unify(list( factor(make.names(as.character(b.16s$s$DerivingOriginalSampleID), allow_=FALSE)), factor(b.its$s$SampleID) )) b.16s$s$SubjectID <- fct_anon_deterministic(.lnb.unified[[1]], prefix="B") b.16s$agg <- left_join(b.16s$agg, b.16s$s) b.its$s$SubjectID <- fct_anon_deterministic(.lnb.unified[[2]], prefix="B") b.its$agg <- left_join(b.its$agg, b.its$s) ``` ## Fig 1: Set A Barcharts ```{r a-16s-barcharts} invisible(with(a.16s, { agg %>% filter(SampleType %in% c("lymph_node", "paraffin", "extr_blank")) %>% mutate(SampleType = fct_recode(SampleType, "water"="extr_blank")) %>% mutate(SampleType = relevel(SampleType, "paraffin")) %>% MakeBarchartData() %>% PlotBarchart() %>% SplitBarcharts() %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig1A_Set%s_%s_%%s.pdf", sampleset, datatype))) })) ``` ```{r a-its-barcharts} invisible(with(a.its, { # browser() agg %>% filter(SampleType %in% c("lymph_node", "paraffin", "extr_blank")) %>% mutate(SampleType = fct_recode(SampleType, "water"="extr_blank")) %>% mutate(SampleType = relevel(SampleType, "paraffin")) %>% ungroup() %>% mutate( MinRankOrder = SarcoidMicrobiome:::min_rank(., "Order")) %>% mutate( MinRankOrder = forcats::fct_recode( MinRankOrder, Other="Fungi", Other="uncultured fungus (phylum)", Other="Indeterminate", Other="uncultured Acremonium (order)")) %>% MakeBarchartData(taxa.level="MinRankOrder") %>% PlotBarchart() %>% SplitBarcharts() %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig1B_Set%s_%s_%%s.pdf", sampleset, datatype))) })) ``` ```{r a-cladosporiaceae} invisible(within(a.its, { # browser() .s <- s %>% select(SampleID, StudyGroup, SampleType, SubjectID) clado <- agg %>% filter(!SampleType %in% c("extr_blank", "pcr_h2o")) %>% select(SampleID, Family, count) %>% filter(Family == "Cladosporiaceae") %>% count(SampleID, wt=count) %>% droplevels() %>% left_join(s) %>% select(SampleID, n, SampleType, SubjectID) %>% complete(SampleType, SubjectID, fill=list(n=0)) %>% filter(SampleType %in% c("paraffin", "lymph_node")) %>% select(-SampleID) %>% left_join(.s) %>% filter(!is.na(SampleID)) .s2 <- .s %>% distinct(SubjectID, StudyGroup) %>% droplevels() clado.diff <- group_by(clado, SubjectID) %>% select(SubjectID, n, SampleType) %>% spread(SampleType, n) %>% mutate(delta=lymph_node-paraffin) %>% ungroup() %>% distinct(SubjectID, delta) %>% left_join(.s2) %>% mutate(delta.log10=sign(delta)*log10(abs(delta))) %>% mutate(delta.log10=ifelse(is.nan(delta.log10), 0, delta.log10)) %>% mutate(sign = delta.log10 > 0) p <- ggplot(clado.diff, aes(x=StudyGroup, y=delta.log10)) + geom_hline(yintercept=0, linetype=2) + geom_boxplot(fill=NA, size=0.5, fatten=4) + # scale_fill_manual(values=pcoa.fills, guide=FALSE) + # scale_color_manual(values=pcoa.colors, guide=FALSE) + # scale_color_manual("", # values=c("TRUE"="black", "FALSE"="black"), # labels=c("TRUE"="Greater in sample", "FALSE"="No greater in sample than env."), # breaks=c("TRUE", "FALSE")) + scale_fill_manual("", values=c("TRUE"="#2166ac", "FALSE"="#d1e5f0"), labels=c("TRUE"="Greater in sample", "FALSE"="No greater in sample than env."), breaks=c("TRUE", "FALSE")) + geom_quasirandom( aes(fill=sign), shape=21, bandwidth = 0.1, width=0.3, size=2 ) + scale_y_continuous( expand=c(0,0), breaks=c(-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5), limits=c(-5,5), labels = function(x) scales::comma(10^(abs(x)) * sign(x)) ) + scale_x_discrete( labels=c("healthy"="Healthy", "sarcoidosis"="Sarcoidosis")) + labs( y="Sample - Environment (reads)", x=NULL, title=NULL) + theme_classic(base_size = 8) + theme( axis.text=element_text(color="black"), axis.line=element_line(size=1, color="black", linetype=1), axis.line.x=element_line(size=1, color="black", linetype=1), axis.line.y=element_line(size=1, color="black", linetype=1), panel.grid.minor=element_blank(), legend.position="bottom", legend.direction="vertical") plot(p) ggsave( file.path(opts$figure_fp, "Fig1C_SetA_Cladosporiaceae.pdf"), plot=p, device=cairo_failsafe, family="Helvetica", height=3, width=2.1) })) ``` ```{r a-rank-abundance, eval=FALSE} ## This generates Whittaker and Preston plots but we decided not to include them. invisible(within(a.16s, { browser() rank_abund <- agg %>% mutate(label=SarcoidMicrobiome:::min_rank(., end.rank="Species")) %>% group_by(StudyGroup, SampleType) %>% filter(SampleType %in% c("lymph_node", "paraffin")) %>% mutate(rank = row_number(desc(proportion))) %>% arrange(rank) %>% mutate(relabund = proportion/sum(proportion)) labelling <- rank_abund %>% filter(rank <= 5) whit <- ggplot(rank_abund, aes(x=rank, y=relabund, color=SampleType)) + geom_point(shape=21) + facet_wrap(~ StudyGroup, scales="free") + scale_y_log10() + annotation_logticks(sides = "l") + ggsci::scale_color_npg(labels=c("Lymph node", "Paraffin")) + labs(x="Rank", y="Relative abundance", title="Whittaker plot of relative species abundance in Set A (16S)") plot(whit) ggsave(file.path( opts$figure_fp, sprintf("FigX1_Set%s_%s_WhittakerPlot.pdf", sampleset, datatype)), plot=whit, height=4, width=8) pres <- ggplot(rank_abund, aes(x=log2(relabund), fill=SampleType)) + geom_histogram(binwidth=1, position="dodge", color="black") + facet_wrap(~ StudyGroup) + scale_x_continuous(labels = function(x) sprintf("%.1e", 2^x)) + scale_y_continuous(expand=c(0,0)) + ggsci::scale_fill_npg(name="Sample type", labels=c("Lymph node", "Paraffin")) + labs(x="Relative abundance (log2 scaled)", y="OTUs", title="Preston plot of relative species abundance in Set A (16S)") plot(pres) ggsave(file.path( opts$figure_fp, sprintf("FigX1_Set%s_%s_PrestonPlot.pdf", sampleset, datatype)), plot=pres, height=4, width=8) })) ``` ```{r a-16s-heatmaps} invisible(within(c.vir, { browser() heatmap.data <- agg %>% filter(SampleType %in% c("BAL", "prewash"), !Species %in% tech.contam) %>% SarcoidMicrobiome:::MakeHeatmapData(s, min.samples = 1, min.rank.1 = "Species") %>% mutate(proportion = ifelse(is.na(proportion), 0, proportion)) %>% group_by(SampleType, StudyGroup, MinRank1) %>% summarize(proportion=median(proportion, na.rm=TRUE)) %>% mutate(rank = row_number(desc(proportion))) top.taxa <- (heatmap.data %>% filter(rank <= 10))$MinRank1 heatmap.data <- heatmap.data %>% filter(MinRank1 %in% top.taxa) %>% ungroup %>% mutate( StudyGroup = fct_recode( StudyGroup, "Healthy"="healthy", "Sarcoidosis"="sarcoidosis"), SampleType = fct_recode( SampleType, "Lymph node"="lymph_node", "Paraffin"="paraffin" )) .mat <- reshape2::dcast( heatmap.data, MinRank1 ~ StudyGroup + SampleType, value.var="proportion", fill=0 ) rownames(.mat) <- .mat$MinRank1 .mat$MinRank1 <- NULL minrank_order <- (dist(.mat) %>% hclust(method="centroid"))$order heatmap.data <- heatmap.data %>% mutate(MinRank1 = factor(MinRank1, levels=rownames(.mat)[minrank_order])) heatmap <- SarcoidMicrobiome:::PlotHeatmap( heatmap.data, use.reads=FALSE, x.axis = "SampleType", threshold = .28) + facet_grid(. ~ StudyGroup, space="free", scales="free") + scale_x_discrete(expand=c(0,0)) + # theme_classic(base_size = 12) + theme( text = element_text(color="black"), axis.text.x = element_text(angle=0, hjust=0.5, vjust=0), axis.title = element_blank(), strip.text.x = element_text(angle=0), strip.background = element_rect(fill=NA, color="grey20") ) + ggsci::scale_fill_material("blue-grey", name="Median %", na.value="white", labels=scales::percent_format()) plot(heatmap) # ggsave(file.path( # opts$figure_fp, # sprintf("FigX2_Set%s_%s_SummaryHeatmap.pdf", sampleset, datatype)), # plot=heatmap, height=4, width=5.5) })) ``` ```{r a-16s-topbarcharts} invisible(within(a.16s, { p <- agg %>% ungroup %>% filter( SampleType %in% c("lymph_node", "paraffin"), StudyGroup %in% c("healthy", "sarcoidosis")) %>% MakeTopBarchartData(s, top=6, min.rank="Species") %>% PlotTopBarcharts() + scale_fill_manual("", values=top.barchart.fills) + scale_color_manual("", values=top.barchart.fills) + scale_y_continuous(expand=c(0,0), labels=scales::percent) + labs( title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype)) plot(p) ggsave(file.path( opts$figure_fp, sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)), plot=p, height=4, width=7) })) ``` ```{r a-its-topbarcharts} invisible(within(a.its, { # browser() p <- agg %>% ungroup %>% filter( SampleType %in% c("lymph_node", "paraffin"), StudyGroup %in% c("healthy", "sarcoidosis")) %>% MakeTopBarchartData(s, top=6, min.rank="Species") %>% PlotTopBarcharts() + scale_fill_manual("", values=top.barchart.fills) + scale_color_manual("", values=top.barchart.fills) + scale_y_continuous(expand=c(0,0), labels=scales::percent) + labs( title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype)) plot(p) ggsave(file.path( opts$figure_fp, sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)), plot=p, height=4, width=7) })) ``` ## Fig 2: Set B Barcharts ```{r b-16s-barcharts} invisible(within(b.16s, { agg %>% filter(SampleType %in% c("lymph_node", "water")) %>% mutate(StudyGroup = fct_recode(StudyGroup, "extr_ctrl"="ext_ctrl")) %>% # mutate(SubjectID = ifelse(SampleType == "water", "B00", as.character(SubjectID))) %>% MakeBarchartData() %>% PlotBarchart() %>% SplitBarcharts() %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig2A_Set%s_%s_%%s.pdf", sampleset, datatype)), height=opts$barchart.height/1.5) })) ``` ```{r b-its-barcharts} invisible(within(b.its, { agg %>% group_by(SubjectID) %>% filter(sum(count) > 0) %>% filter(SampleType %in% c("lymph_node", "extr_blank")) %>% mutate(SampleType = fct_recode(SampleType, "water"="extr_blank")) %>% ungroup() %>% mutate( MinRankOrder = SarcoidMicrobiome:::min_rank(., "Order")) %>% mutate( MinRankOrder = forcats::fct_recode( MinRankOrder, Other="Fungi", Other="uncultured fungus (phylum)", Other="Indeterminate")) %>% MakeBarchartData(taxa.level = "MinRankOrder") %>% PlotBarchart() %>% SplitBarcharts() %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig2B_Set%s_%s_%%s.pdf", sampleset, datatype)), height=opts$barchart.height/1.5) })) ``` ```{r b-16s-topbarcharts} invisible(within(b.16s, { # browser() p <- agg %>% ungroup %>% filter( SampleType %in% c("lymph_node", "paraffin"), StudyGroup %in% c("healthy", "sarcoidosis")) %>% MakeTopBarchartData(s, top=6, min.rank="Species") %>% PlotTopBarcharts() + scale_fill_manual("", values=top.barchart.fills) + scale_color_manual("", values=top.barchart.fills) + scale_y_continuous(expand=c(0,0), labels=scales::percent) + labs( title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype)) plot(p) ggsave(file.path( opts$figure_fp, sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)), plot=p, height=4, width=7) })) ``` ```{r b-its-topbarcharts} invisible(within(b.its, { # browser() p <- agg %>% ungroup %>% filter( SampleType %in% c("lymph_node", "paraffin"), StudyGroup %in% c("healthy", "sarcoidosis")) %>% MakeTopBarchartData(s, top=6, min.rank="Species") %>% PlotTopBarcharts() + scale_fill_manual("", values=top.barchart.fills) + scale_color_manual("", values=top.barchart.fills) + scale_y_continuous(expand=c(0,0), labels=scales::percent) + labs( y="Median abundance", title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype)) plot(p) ggsave(file.path( opts$figure_fp, sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)), plot=p, height=4, width=7) })) ``` ## Figure 3: Set C Barcharts ```{r c-16s-barcharts} invisible(within(c.16s, { agg %>% filter(SampleType %in% c("BAL", "prewash")) %>% mutate(SampleType = relevel(SampleType, "prewash")) %>% droplevels() %>% MakeBarchartData() %>% group_by(SubjectID, StudyGroup) %>% complete(SampleType, fill = list(count=0)) %>% PlotBarchart(fill.values = bacterial.colors, x.labs = opts$bal.labels) %>% SplitBarcharts(groups = c("sarcoidosis", "healthy")) %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig3A_Set%s_%s_%%s.pdf", sampleset, datatype))) })) ``` ```{r c-its-barcharts} invisible(within(c.its, { agg %>% filter(SampleType %in% c("BAL", "prewash")) %>% mutate(SampleType = relevel(SampleType, "prewash")) %>% droplevels() %>% MakeBarchartData() %>% group_by(SubjectID, StudyGroup) %>% complete(SampleType, fill = list(count=0)) %>% group_by(SubjectID) %>% filter(sum(count) > 0) %>% PlotBarchart(fill.values = fungal.colors, x.labs = opts$bal.labels) %>% SplitBarcharts(groups = c("sarcoidosis", "healthy")) %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig3B_Set%s_%s_%%s.pdf", sampleset, datatype))) })) ``` ```{r c-virome-barcharts} invisible(within(c.vir, { tech.contam <- c( "Enterobacteria phage M13", "Enterobacteria phage T7", "Enterobacteria phage phiX174 sensu lato","Bacillus phage phi29", "Pseudomonas phage phi6", "Shamonda virus", "Human herpesvirus 7", "Human herpesvirus 6", "Cyprinid herpesvirus 3") agg %>% filter(LibraryType == "Genomiphi") %>% filter(SampleType %in% c("BAL", "prewash")) %>% filter(!Species %in% tech.contam) %>% mutate(SampleType = relevel(SampleType, "prewash")) %>% droplevels() %>% MakeBarchartData(taxa.level = "Family") %>% group_by(SubjectID, StudyGroup) %>% complete(SampleType, fill = list(count=0)) %>% group_by(SubjectID) %>% filter(sum(count) > 0) %>% PlotBarchart(fill.values = NULL, x.labs = opts$bal.labels) %>% SplitBarcharts(groups = c("sarcoidosis", "healthy")) %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig3C_Set%s_%s_%%s.pdf", sampleset, datatype))) })) ``` ```{r c-16s-topbarcharts} invisible(within(c.16s, { # browser() p <- agg %>% ungroup %>% filter( SampleType %in% c("BAL", "prewash"), StudyGroup %in% c("healthy", "sarcoidosis")) %>% MakeTopBarchartData(s, top=6, min.rank="Species") %>% PlotTopBarcharts() + scale_fill_manual("", values=top.barchart.fills) + scale_color_manual("", values=top.barchart.fills) + scale_y_continuous(expand=c(0,0), labels=scales::percent) + labs( y="Median abundance", title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype)) plot(p) ggsave(file.path( opts$figure_fp, sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)), plot=p, height=4, width=7) })) ``` ```{r c-its-topbarcharts} invisible(within(c.its, { # browser() p <- agg %>% ungroup %>% filter( SampleType %in% c("BAL", "prewash"), StudyGroup %in% c("healthy", "sarcoidosis")) %>% MakeTopBarchartData(s, top=6, min.rank="Species") %>% PlotTopBarcharts() + scale_fill_manual("", values=top.barchart.fills) + scale_color_manual("", values=top.barchart.fills) + scale_y_continuous(expand=c(0,0), labels=scales::percent) + labs( title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype)) plot(p) ggsave(file.path( opts$figure_fp, sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)), plot=p, height=4, width=7) })) ``` ```{r c-vir-topbarcharts} invisible(within(c.vir, { # browser() tech.contam <- c( "Enterobacteria phage M13", "Enterobacteria phage T7", "Enterobacteria phage phiX174 sensu lato", "Bacillus phage phi29", "Pseudomonas phage phi6", "Shamonda virus", "Human herpesvirus 7", "Human herpesvirus 6", "Cyprinid herpesvirus 3") # browser() p <- agg %>% ungroup %>% filter( SampleType %in% c("BAL", "prewash"), StudyGroup %in% c("healthy", "sarcoidosis"), !Species %in% tech.contam, Genus != "Phi29virus", LibraryType == "Genomiphi") %>% MakeTopBarchartData(s, top=6, min.rank="Genus") %>% PlotTopBarcharts() + scale_fill_manual("", values=top.barchart.fills) + scale_color_manual("", values=top.barchart.fills) + scale_y_continuous(expand=c(0,0), labels=scales::percent) + labs( title=sprintf("Dominant taxa in Set %s (%s)", sampleset, datatype)) plot(p) ggsave(file.path( opts$figure_fp, sprintf("FigXb_Set%s_%s_Boxplots.pdf", sampleset, datatype)), plot=p, height=4, width=7) })) ``` ## Figure 4: Sets D/E Barcharts ```{r de-16s-barcharts} invisible(within(de.16s, { .s <- distinct(s, Sample, .keep_all = TRUE) %>% select(-SampleID) .agg <- agg %>% group_by(Sample, otu) %>% summarize(count=sum(count)) %>% group_by(Sample) %>% mutate(proportion=count/sum(count)) %>% left_join(.s) %>% left_join(md) %>% rename(SampleID=Sample) %>% mutate(SubjectID=SampleID) %>% droplevels() tmp <- .agg %>% MakeBarchartData() %>% PlotBarchart( x.col="SampleID", x.labs=c("B"="BL", "Sal"="SL"), theme=opts$barchart.theme + theme(axis.text.y=element_text(family="mono"))) %>% SplitBarcharts(groups=c("Sarcoidosis", "Control")) %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig4A_Set%s_%s_%%s.pdf", sampleset, datatype)), height=opts$barchart.height/1.5) })) ``` ```{r de-its-barcharts} invisible(within(de.its, { agg %>% mutate(SampleType = fct_recode(SampleType, "water"="extr_blank")) %>% mutate(SubjectID=SampleID) %>% mutate( MinRankOrder = SarcoidMicrobiome:::min_rank(., "Order")) %>% mutate( MinRankOrder = forcats::fct_recode( MinRankOrder, Other="Fungi", Other="uncultured fungus (phylum)", Other="Indeterminate", Other="Coniochaetales")) %>% MakeBarchartData(taxa.level="MinRankOrder") %>% PlotBarchart( x.col="SubjectID", x.labs=c("Blank"="BL", "Saline"="SL", "Kveim"="KV", "Spleen1"="S1", "Spleen2"="S2", "Spleen3"="S3"), theme=opts$barchart.theme + theme(axis.text.y=element_text(family="mono"))) %>% SplitBarcharts(groups=c("Sarcoidosis", "Control")) %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig4B_Set%s_%s_%%s.pdf", sampleset, datatype)), height=opts$barchart.height/1.5) # browser() })) ``` ```{r de-wgs-barcharts} invisible(within(de.wgs, { agg %>% filter(Order != "Primates") %>% mutate(SubjectID=SampleID) %>% MakeBarchartData() %>% PlotBarchart( x.col="SampleID", x.labs=c("B"="BL", "Sal"="SL"), theme=opts$barchart.theme + theme(axis.text.y=element_text(family="mono"))) %>% SplitBarcharts(groups=c("Sarcoidosis", "Control")) %>% SaveBarcharts( file.path(opts$figure_fp, sprintf("Fig4C_Set%s_%s_%%s.pdf", sampleset, datatype)), height=opts$barchart.height/1.5) })) ``` ## Figure S2: Set A Community Differences ```{r a-16s-ln-pcoa} invisible(within(a.16s, { set.seed(1) .agg <- agg %>% filter( StudyGroup != "extr_ctrl", SampleType %in% c("lymph_node")) %>% mutate(SampleType = fct_recode( SampleType, c("lymph node"="lymph_node"))) %>% mutate(readcount=total) PCoAChunk( .agg, s, tree, rarefy.depth=1000, testing.terms=c("readcount","StudyGroup"), sampleset=sampleset, dataset=datatype, subset="tissue", fig.num="S2A", fig.dir=opts$figure_fp) })) ``` ```{r a-its-ln-pcoa} invisible(within(a.its, { set.seed(1) .agg <- agg %>% filter( count > 0, StudyGroup != "extr_ctrl", SampleType %in% c("lymph_node")) %>% mutate(SampleType = fct_recode(SampleType, c("lymph node"="lymph_node"))) %>% mutate(readcount=total) PCoAChunk( .agg, s, tree, rarefy.depth=900, testing.terms=c("readcount", "StudyGroup"), sampleset=sampleset, dataset=datatype, subset="tissue", fig.num="S2B", fig.dir=opts$figure_fp) })) ``` ```{r a-its-paraffin-pcoa} invisible(within(a.its, { set.seed(1) .agg <- agg %>% filter( count > 0, StudyGroup != "extr_ctrl", SampleType %in% c("paraffin")) %>% mutate(readcount=total) PCoAChunk( .agg, s, tree, rarefy.depth=900, testing.terms=c("readcount", "StudyGroup"), sampleset="A", dataset="ITS", subset="paraffin", fig.num="S2D", fig.dir=opts$figure_fp) })) ``` ```{r a-16s-paraffin-pcoa} invisible(within(a.16s, { set.seed(1) .agg <- agg %>% filter( StudyGroup != "extr_ctrl", SampleType %in% c("paraffin")) %>% mutate(SampleType = fct_recode(SampleType, c("lymph node"="lymph_node"))) %>% mutate(readcount=total) PCoAChunk( .agg, s, tree, rarefy.depth=1000, testing.terms=c("readcount", "StudyGroup"), sampleset=sampleset, dataset=datatype, subset="paraffin", fig.num="S2C", fig.dir=opts$figure_fp) })) ``` ## Figure S3: Set B Community Differences ```{r b-16s-pcoa} invisible(within(b.16s, { # .agg <- agg %>% filter( StudyGroup != "extr_ctrl", SampleType %in% c("lymph_node", "paraffin"), StudyGroup %in% c("sarcoidosis", "healthy")) %>% mutate( SampleType = fct_recode(SampleType, c("lymph node"="lymph_node"))) %>% mutate(readcount=total) PCoAChunk( .agg, s, tree, rarefy.depth=900, testing.terms=c("readcount", "StudyGroup"), sampleset=sampleset, dataset=datatype, subset="tissue", fig.num="S3A", fig.dir=opts$figure_fp) })) ``` ```{r b-its-pcoa} invisible(within(b.its, { # .agg <- agg %>% filter( StudyGroup != "extr_ctrl", SampleType %in% c("lymph_node", "paraffin"), StudyGroup %in% c("sarcoidosis", "healthy")) %>% mutate( SampleType = fct_recode(SampleType, c("lymph node"="lymph_node"))) %>% mutate(readcount=total) PCoAChunk( .agg, s, tree, rarefy.depth=900, testing.terms=c("readcount", "StudyGroup"), sampleset=sampleset, dataset=datatype, subset="tissue", fig.num="S3B", fig.dir=opts$figure_fp) })) ``` ## Figure S4: Set C Community Differences ```{r c-16s-bal-pcoa} invisible(within(c.16s, { .agg <- agg %>% filter( StudyGroup != "extr_ctrl", SampleType %in% c("BAL")) %>% mutate(readcount=total) PCoAChunk( .agg, s, tree, rarefy.depth=1000, testing.terms=c("StudyGroup"), sampleset=sampleset, dataset=datatype, subset="BAL", fig.num="S4A", fig.dir=opts$figure_fp) })) ``` ```{r c-16s-pw-pcoa} invisible(within(c.16s, { .agg <- agg %>% filter( StudyGroup != "extr_ctrl", SampleType %in% c("prewash")) %>% mutate(readcount=total) PCoAChunk( .agg, s, tree, rarefy.depth=1000, testing.terms=c("StudyGroup"), sampleset=sampleset, dataset=datatype, subset="prewash", fig.num="S4B", fig.dir=opts$figure_fp) })) ``` ## Figure S5: Viral populations in prewash ```{r c-vir-prewash} invisible(within(c.vir, { tech.contam <- c( "Enterobacteria phage M13", "Enterobacteria phage T7", "Enterobacteria phage phiX174 sensu lato","Bacillus phage phi29", "Bacillus virus phi29", "Pseudomonas phage phi6", "Shamonda virus", "Human herpesvirus 7", "Human herpesvirus 6", "Cyprinid herpesvirus 3") p <- agg %>% filter(SampleType == "prewash", LibraryType == "Genomiphi") %>% filter(!Species %in% tech.contam) %>% MakeHeatmapData(s, "Species") %>% PlotHeatmap(use.reads = FALSE) %>% eclectic::make_square() + theme( strip.text.x=element_text(angle=0), # strip.background=element_blank(), axis.text.x=element_blank(), axis.title=element_blank(), axis.ticks.x=element_blank() ) plot(p) ggsave( file.path(opts$figure_fp, "FigS5_SetC_virome_prewash.pdf"), p, width=7, height=9) })) ``` ## Table S1: Sample Metadata ```{r} combined <- do.call(rbind, lapply(list( a.16s, a.its, b.16s, b.its, c.16s, c.its, c.vir, de.16s, de.its, de.wgs), StandardizeMetadata)) write.table( combined, file=file.path(opts$table_fp, "all_samples.tsv"), sep="\t", row.names = FALSE, quote = FALSE) ``` ### SRA data ```{r biosample-tables} WriteSRATables <- function(ds) { biosample <- MakeBioSample(ds) sra <- MakeSRAMetadata(ds) write.table( biosample, file=file.path( opts$table_fp, sprintf("biosample_%s_%s.tsv", ds$sampleset, ds$datatype)), sep="\t", row.names = FALSE, quote=FALSE) write.table( sra, file=file.path( opts$table_fp, sprintf("sra_%s_%s.tsv", ds$sampleset, ds$datatype)), sep="\t", row.names = FALSE, quote=FALSE) } lapply(list( a.16s, a.its, b.16s, b.its, c.16s, c.its, c.vir, de.16s, de.its, de.wgs), WriteSRATables) ```