library(gdata) library(colorRamps) library(RColorBrewer) library(pheatmap) options(warn=-1) ## Help Documentation describe = function(obj) { if ('help' %in% names(attributes(obj))) { writeLines(attr(obj, 'help')) } } attr(describe, 'help') = " This function prints the contents of the 'help' attribute of any R object. It is meant to provide help documentation in the same vein as Docstrings in Python. " error.bar = function(x, y, upper, lower=upper, arrow.length=0.05, ...) { if(length(x) != length(y) | length(y) !=length(lower) | length(lower) != length(upper)) { print('x') print(length(x)) print('y') print(length(y)) print('upper') print(length(upper)) stop("vectors must be same length") } arrows(x, y+upper, x, y-lower, angle=90, code=3, length=arrow.length, ...) } attr(error.bar, 'help') = " This function adds error bars to plots. Parameters: x: A numeric vector of X values. y: A numeric vector of Y values. upper: A numeric vector containing the standard errors for the Y values. Returns: Plots arrows (error bars). " ## Format the data for boxplots flow_boxplot_data = function(flow_df, lines, tissue, flow_vars, tp, line_colors=NULL, mocks_only=FALSE, data_type=1) { ## Initialize lists bdat = list() pdat = list() ## Get Mock data n_mocks = 0 mock_lines = c() for (l in lines) { label = paste(l, 'MOCK', sep='_') d = flow_df[flow_df$UW_Line==l & tolower(flow_df$Virus)=='mock' & flow_df$Tissue==tissue & flow_df$Timepoint %in% as.numeric(tp), flow_vars] if (!all(is.na(d))) { bdat[[label]] = d n_mocks = n_mocks + 1 mock_lines = c(mock_lines, l) } tmp_list = list() for (t in tp) { d = flow_df[flow_df$UW_Line==l & tolower(flow_df$Virus)=='mock' & flow_df$Tissue==tissue & flow_df$Timepoint == as.numeric(t), flow_vars] if (!all(is.na(d))) { tmp_list[[t]] = d } } if (length(tmp_list) > 0) { pdat[[label]] = tmp_list } } ## Get WNV data n_wnv = 0 wnv_lines = c() if (!mocks_only) { for (l in lines) { label = paste(l, 'WNV', sep='_') d = flow_df[flow_df$UW_Line==l & tolower(flow_df$Virus)=='wnv' & flow_df$Tissue==tissue & flow_df$Timepoint %in% as.numeric(tp), flow_vars] if (!all(is.na(d))) { bdat[[label]] = d n_wnv = n_wnv + 1 wnv_lines = c(wnv_lines, l) } tmp_list = list() for (t in tp) { d = flow_df[flow_df$UW_Line==l & tolower(flow_df$Virus)=='wnv' & flow_df$Tissue==tissue & flow_df$Timepoint == as.numeric(t), flow_vars] if (!all(is.na(d))) { tmp_list[[t]] = d } } if (length(tmp_list) > 0) { pdat[[label]] = tmp_list } } } ## Create boxplot colors union_lines = union(mock_lines, wnv_lines) n_colors = length(union_lines) l_colors = colorRampPalette(brewer.pal(9,'Set1'))(n_colors) if (!mocks_only) { mock_colors = c() for (l in mock_lines) { mock_colors = c(mock_colors, l_colors[which(union_lines==l)]) } wnv_colors = c() for (l in wnv_lines) { wnv_colors = c(wnv_colors, l_colors[which(union_lines==l)]) } line_colors = c(mock_colors, wnv_colors) } else { line_colors = c(l_colors[1:n_mocks]) } ## DEBUGGING / TESTING #print(line_colors) ## Determine data type (e.g. percentages vs. absolute counts) data_type = as.numeric(data_type) if (length(grep("^ics_.*", flow_vars)) > 0) { data_type = data_type + 2 } ## Create plot title with heritability annotation #icc_var1 = paste(tissue, '_mock_icc_gelman_hill', sep="") #icc_var2 = paste(tissue, '_icc_gelman_hill', sep="") #title = paste(flow_vars, " (ICC - Mock = ", round(flow_heritability[flow_vars, icc_var1], digits=3), "; ICC - All = ", round(flow_heritability[flow_vars, icc_var2], digits=3), ")", sep="") title = flow_vars ## Return heatmap matrix return(list(bdat_list=bdat, pdat_list=pdat, colors=line_colors, num_mocks=n_mocks, time_points=tp, dtype=data_type, title=title)) } attr(flow_boxplot_data, 'help') = " This function aggregates the data needed for creating boxplots of the flow cytometry data. Parameters: flow_df: The dataframe containing the flow data. lines: A numeric vector containing the mouse lines that should be plotted. tissue: A string indicating the tissue (e.g. 'brain' or 'spleen'). flow_vars: A string indicating the flow variable to be plotted. tp: A character vector containing the time points to be included. line_colors: A vector of colors (default=NULL; colors will be determined automatically). mocks_only: Logical value indicating whether mocks only will be plotted. data_type: A number indicating whether percentages (1) or absolute cell counts (2) will be plotted. Returns: A list containing the data to be plotted; input for the flow_boxplots() function. " ## Boxplots flow_boxplots = function(boxplot_data, ...) { tp_symbols = c("7"=21, "12"=22, "21"=23, "28"=24) if (boxplot_data$dtype == 1) { ylab="Percent" } else if (boxplot_data$dtype == 2) { ylab="Cell Count" } else if (boxplot_data$dtype == 3) { ylab="Percent Ratio" } else { ylab="Cell Count Ratio" } if (is.na(boxplot_data$y_min) | is.na(boxplot_data$y_max)) { if (boxplot_data$dtype == 1) { ylim = c(0,105) } else { ylim = NULL } } else { ylim = c(boxplot_data$y_min, boxplot_data$y_max) } if (boxplot_data$rm_outliers) { if (boxplot_data$dtype == 1) { boxplot(boxplot_data$bdat_list, col=boxplot_data$colors, xaxt = "n", xlab = "", ylab=ylab, main=boxplot_data$title, outline=FALSE, ylim=ylim) if (boxplot_data$show_data) { for (t in boxplot_data$time_points) { d = lapply(boxplot_data$pdat_list, function(x){x[[t]]}) stripchart(d, method='jitter', pch=tp_symbols[t], lwd=2, cex=1.25, vertical=T, add=T) } } } else { boxplot(boxplot_data$bdat_list, col=boxplot_data$colors, xaxt = "n", xlab = "", ylab=ylab, main=boxplot_data$title, outline=FALSE, ylim=ylim) if (boxplot_data$show_data) { for (t in boxplot_data$time_points) { d = lapply(boxplot_data$pdat_list, function(x){x[[t]]}) stripchart(d, method='jitter', pch=tp_symbols[t], lwd=2, cex=1.25, vertical=T, add=T) } } } } else { if (boxplot_data$dtype == 1) { boxplot(boxplot_data$bdat_list, col=boxplot_data$colors, xaxt = "n", xlab = "", ylab=ylab, main=boxplot_data$title, pch=8, cex=1.5, ylim=ylim) if (boxplot_data$show_data) { for (t in boxplot_data$time_points) { d = lapply(boxplot_data$pdat_list, function(x){x[[t]]}) stripchart(d, method='jitter', pch=tp_symbols[t], lwd=2, cex=1.25, vertical=T, add=T) } } } else { boxplot(boxplot_data$bdat_list, col=boxplot_data$colors, xaxt = "n", xlab = "", ylab=ylab, main=boxplot_data$title, pch=8, cex=1.5, ylim=ylim) if (boxplot_data$show_data) { for (t in boxplot_data$time_points) { d = lapply(boxplot_data$pdat_list, function(x){x[[t]]}) stripchart(d, method='jitter', pch=tp_symbols[t], lwd=2, cex=1.25, vertical=T, add=T) } } } } if (boxplot_data$show_data) { legend('topleft', legend=c('D7', 'D12', 'D21', 'D28'), pch=tp_symbols, pt.lwd=2, pt.cex=1.25, ncol=4) } labels = names(boxplot_data$bdat_list) axis(1, at=1:length(labels), labels=FALSE) y_interval = (par("usr")[4] - par("usr")[3])/675 y_label_pos = par("usr")[3]-(80*y_interval) text(x=seq_along(labels), y=y_label_pos, srt=270, adj=1, labels=labels, xpd=TRUE, ...) abline(v=boxplot_data$num_mocks+0.5, lty=3) return(NULL) } attr(flow_boxplots, 'help') = " This function plots the data returned by flow_boxplot_data(). Parameters: boxplot_data: This should be a list combining the value returned by flow_boxplot_data() and additional options. Returns: NULL Examples: boxplot_data = flow_boxplot_data(flow_full, c(7,8,9), 'brain', 'treg_T_regs', c('7','12','21','28')) opts = list(rm_outliers=F, show_data=F, y_min=0, y_max=100) flow_boxplots(c(boxplot_data, opts)) " flow_multiline_plot_data = function(flow_df, uw_lines, tissue, flow_vars, plot_type, FUN=NULL) { ## Get time points ## Only plots at time points where all variables exist if (plot_type == 1) { mock_lines = c() mock_tp = c() for (uw_line in uw_lines) { tp_existing = flow_df$Timepoint[flow_df$UW_Line==uw_line & flow_df$Virus == 'Mock' & flow_df$Tissue==tissue & !is.na(flow_df[,flow_vars])] if (length(tp_existing) > 0) { mock_tp = c(tp_existing, mock_tp) mock_lines = c(mock_lines, uw_line) } } mock_tp = sort(unique(mock_tp)) wnv_lines = c() tp = c() for (uw_line in uw_lines) { tp_existing = flow_df$Timepoint[flow_df$UW_Line==uw_line & flow_df$Virus == 'WNV' & flow_df$Tissue==tissue & !is.na(flow_df[,flow_vars])] if (length(tp_existing) > 0) { tp = c(tp_existing, tp) wnv_lines = c(wnv_lines, uw_line) } } tp = sort(unique(tp)) uw_lines = sort(as.numeric(unique(c(mock_lines, wnv_lines)))) } else { new_vars = c() mock_tp = c() for (flow_var in flow_vars) { tp_existing = flow_df$Timepoint[flow_df$UW_Line==uw_lines & flow_df$Virus == 'Mock' & flow_df$Tissue==tissue & !is.na(flow_df[,flow_var])] if (length(tp_existing) > 0) { mock_tp = c(tp_existing, mock_tp) new_vars = c(new_vars, flow_var) } } mock_tp = sort(unique(mock_tp)) tp = c() for (flow_var in flow_vars) { tp_existing = flow_df$Timepoint[flow_df$UW_Line==uw_lines & flow_df$Virus == 'WNV' & flow_df$Tissue==tissue & !is.na(flow_df[,flow_var])] if (length(tp_existing) > 0) { tp = c(tp_existing, tp) new_vars = c(new_vars, flow_var) } } tp = sort(unique(tp)) flow_vars = unique(new_vars) } #print(mock_tp) #print(tp) #print(uw_lines) #print(flow_vars) if (length(mock_tp)+length(tp) == 0) { return(NULL) } ## Create data matrices if (plot_type == 1) { x = matrix(NA, nrow=(length(mock_tp)+length(tp)), ncol=length(uw_lines)) y = matrix(NA, nrow=(length(mock_tp)+length(tp)), ncol=length(uw_lines)) e = matrix(NA, nrow=(length(mock_tp)+length(tp)), ncol=length(uw_lines)) l_mat = matrix(NA, nrow=(length(mock_tp)+length(tp)), ncol=length(uw_lines)) } else { x = matrix(NA, nrow=(length(mock_tp)+length(tp)), ncol=length(flow_vars)) y = matrix(NA, nrow=(length(mock_tp)+length(tp)), ncol=length(flow_vars)) e = matrix(NA, nrow=(length(mock_tp)+length(tp)), ncol=length(flow_vars)) l_mat = matrix(NA, nrow=(length(mock_tp)+length(tp)), ncol=length(flow_vars)) } ## Get mock data if (length(mock_tp) > 0) { if (plot_type == 1) { i = 1 end = length(mock_tp) for (uw_line in uw_lines) { if (uw_line %in% mock_lines) { ## Get means for each group m = aggregate(flow_df[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='Mock', flow_vars], list(flow_df$Timepoint[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='Mock']), mean, na.rm=T) colnames(m) = c('group','y') for (t in setdiff(mock_tp, m$group)) { m = rbind(m, c(t, NA)) } m = m[match(mock_tp,m[,1]),] m$x = c(1:end) #print(m) ## Get SD for each group s = aggregate(flow_df[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='Mock', flow_vars], list(flow_df$Timepoint[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='Mock']), sd, na.rm=T) colnames(s) = c('group','y') for (t in setdiff(mock_tp, s$group)) { s = rbind(s, c(t, NA)) } s = s[match(mock_tp,s[,1]),] ## Get size of each group l = aggregate(flow_df[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='Mock', flow_vars], list(flow_df$Timepoint[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='Mock']), function(x){sum(!is.na(x))}) colnames(l) = c('group','len') for (t in setdiff(mock_tp, l$group)) { l = rbind(l, c(t, NA)) } l = l[match(mock_tp,l[,1]),] x[1:length(mock_tp),i] = m$x y[1:length(mock_tp),i] = m$y e[1:length(mock_tp),i] = s$y l_mat[1:length(mock_tp),i] = l$len } else { x[1:length(mock_tp),i] = NA y[1:length(mock_tp),i] = NA e[1:length(mock_tp),i] = NA l_mat[1:length(mock_tp),i] = NA } i = i + 1 } } else { i = 1 end = length(mock_tp) for (flow_var in flow_vars) { #print(flow_var) ## Get means for each group m = aggregate(flow_df[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='Mock', flow_var], list(flow_df$Timepoint[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='Mock']), mean, na.rm=T) #print(m) colnames(m) = c('group','y') for (t in setdiff(mock_tp, m$group)) { m = rbind(m, c(t, NA)) } m = m[match(mock_tp,m[,1]),] m$x = c(1:end) ## Get SD for each group s = aggregate(flow_df[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='Mock', flow_var], list(flow_df$Timepoint[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='Mock']), sd, na.rm=T) colnames(s) = c('group','y') for (t in setdiff(mock_tp, s$group)) { s = rbind(s, c(t, NA)) } s = s[match(mock_tp,s[,1]),] ## Get size of each group l = aggregate(flow_df[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='Mock', flow_var], list(flow_df$Timepoint[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='Mock']), function(x){sum(!is.na(x))}) colnames(l) = c('group','len') for (t in setdiff(mock_tp, l$group)) { l = rbind(l, c(t, NA)) } l = l[match(mock_tp,l[,1]),] x[1:length(mock_tp),i] = m$x y[1:length(mock_tp),i] = m$y e[1:length(mock_tp),i] = s$y l_mat[1:length(mock_tp),i] = l$len i = i + 1 } } } #print(mock_tp) #print(tp) #print(x) #print(y) # Get WNV data if (length(tp) > 0) { if (plot_type == 1) { i = 1 start = length(mock_tp)+1 end = length(mock_tp)+length(tp) for (uw_line in uw_lines) { if (uw_line %in% wnv_lines) { ## Get means for each group m = aggregate(flow_df[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='WNV', flow_vars], list(flow_df$Timepoint[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='WNV']), mean, na.rm=T) colnames(m) = c('group','y') for (t in setdiff(tp, m$group)) { m = rbind(m, c(t, NA)) } m = m[match(tp,m[,1]),] m$x = c(start:end) ## Get SD for each group s = aggregate(flow_df[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='WNV', flow_vars], list(flow_df$Timepoint[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='WNV']), sd, na.rm=T) colnames(s) = c('group','y') for (t in setdiff(tp, s$group)) { s = rbind(s, c(t, NA)) } s = s[match(tp,s[,1]),] ## Get size of each group l = aggregate(flow_df[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='WNV', flow_vars], list(flow_df$Timepoint[flow_df$UW_Line==uw_line & flow_df$Tissue==tissue & flow_df$Virus=='WNV']), function(x){sum(!is.na(x))}) colnames(l) = c('group','len') for (t in setdiff(tp, l$group)) { l = rbind(l, c(t, NA)) } l = l[match(tp,l[,1]),] x[start:end,i] = m$x y[start:end,i] = m$y e[start:end,i] = s$y l_mat[start:end,i] = l$len } else { x[start:end,i] = NA y[start:end,i] = NA e[start:end,i] = NA l_mat[start:end,i] = NA } i = i + 1 } } else { i = 1 start = length(mock_tp)+1 end = length(mock_tp)+length(tp) for (flow_var in flow_vars) { ## Get means for each group m = aggregate(flow_df[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='WNV', flow_var], list(flow_df$Timepoint[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='WNV']), mean, na.rm=T) colnames(m) = c('group','y') for (t in setdiff(tp, m$group)) { m = rbind(m, c(t, NA)) } m = m[match(tp,m[,1]),] m$x = c(start:end) ## Get SD for each group s = aggregate(flow_df[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='WNV', flow_var], list(flow_df$Timepoint[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='WNV']), sd, na.rm=T) colnames(s) = c('group','y') for (t in setdiff(tp, s$group)) { s = rbind(s, c(t, NA)) } s = s[match(tp,s[,1]),] ## Get size of each group l = aggregate(flow_df[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='WNV', flow_var], list(flow_df$Timepoint[flow_df$UW_Line==uw_lines & flow_df$Tissue==tissue & flow_df$Virus=='WNV']), function(x){sum(!is.na(x))}) colnames(l) = c('group','len') for (t in setdiff(tp, l$group)) { l = rbind(l, c(t, NA)) } l = l[match(tp,l[,1]),] x[start:end,i] = m$x y[start:end,i] = m$y e[start:end,i] = s$y l_mat[start:end,i] = l$len i = i + 1 } } } #print(x) #print(y) if (!is.null(FUN)) { y = FUN(y) e = FUN(e) } ## return data list labels = c() if (length(mock_tp) > 0) { labels = paste(mock_tp, 'M', sep='') } labels = c(labels, as.character(tp)) if (plot_type == 1) { return(list(x=x, y=y, e=e, l=l_mat, labels=labels, vars=uw_lines, num_vars=length(uw_lines), plot_type=plot_type, flow_var=flow_vars)) } else { return(list(x=x, y=y, e=e, l=l_mat, labels=labels, vars=flow_vars, num_vars=length(flow_vars), plot_type=plot_type, uw_line=uw_lines)) } } attr(flow_multiline_plot_data, 'help') = " This function aggregates the data needed for creating time-series plots of the flow cytometry data. Parameters: flow_df: The dataframe containing the flow data. uw_lines: A numeric vector containing the mouse lines to be plotted. tissue: A string indicating the tissue (e.g. 'brain' or 'spleen'). flow_vars: A character vector containing the variables to be plotted. plot_type: A number indicating whether the plot will compare lines (1) or compare variables (2). FUN: A function that can be applied to transform the data (default=NULL). Returns: A list containing the data to be plotted; input for the flow_multiline_plots() function. " flow_multiline_plots = function(lineplot_data) { ## plot points and SE d_type = as.numeric(lineplot_data$data_type) if (length(grep("^ics_.*", lineplot_data$vars)) > 0) { d_type = d_type + 2 } if (d_type == 1) { ylab="Percent" } else if (d_type == 2) { ylab="Cell Count" } else if (d_type == 3) { ylab="Percent Ratio" } else { ylab="Cell Count Ratio" } l_colors = colorRampPalette(brewer.pal(9,'Set1'))(lineplot_data$num_vars) if (is.na(lineplot_data$y_min) | is.na(lineplot_data$y_max)) { if (d_type == 1) { ylim = c(0,105) } else { ylim = NULL } } else { ylim = c(lineplot_data$y_min, lineplot_data$y_max) } if (lineplot_data$plot_type == 1) { title = lineplot_data$flow_var n_cols = lineplot_data$num_vars/2 + lineplot_data$num_vars%%2 } else { title = paste("UW Line ", lineplot_data$uw_line, sep='') n_cols = 1 } matplot(lineplot_data$x, lineplot_data$y, type="b", pch=19, lty=1, lwd=4, col=l_colors, ylim=ylim, xaxt="n", ylab=ylab, xlab="Time Point", main=title) #matplot(lineplot_data$x, lineplot_data$y, type="p", pch=19, col=l_colors, ylim=ylim, xaxt="n", ylab=ylab, xlab="Time Point", add=T) axis(1, at=lineplot_data$x[,1], labels=lineplot_data$labels) error.bar(lineplot_data$x, lineplot_data$y, lineplot_data$e/sqrt(lineplot_data$l), lwd=2, col=matrix(l_colors, nrow=length(lineplot_data$labels), ncol=lineplot_data$num_vars, byrow=T)) legend("topleft", legend=lineplot_data$vars, col=l_colors, lty=1, lwd=4, ncol=n_cols) return(NULL) } attr(flow_multiline_plots, 'help') = " This function plots the data returned by flow_multiline_plot_data(). Parameters: lineplot_data: This should be a list combining the value returned by flow_multiline_plot_data() and additional options. Returns: NULL Examples: lineplot_data = flow_multiline_plot_data(flow_full, c(7,8,9), 'brain', 'treg_T_regs', 1) opts = list(data_type=1, y_min=0, y_max=60) flow_multiline_plots(c(lineplot_data, opts)) " flow_heatmap_data = function(flow_df, lines, line_labels=NULL, tissue, flow_vars, var_labels=NULL, tp, herit_df=NULL, line_colors=NULL, mocks_only=FALSE, collapse_mocks=FALSE, no_cluster=FALSE, cluster_all=FALSE, annotations=TRUE) { if (cluster_all) { if (mocks_only) { x = flow_df[flow_df$Tissue==tissue & tolower(flow_df$Virus)=='mock' & flow_df$Timepoint %in% tp,] if (collapse_mocks) { x$Timepoint[tolower(x$Virus)=='mock'] = 0 } } else { x = flow_df[flow_df$Tissue==tissue,] if (collapse_mocks) { x$Timepoint[tolower(x$Virus)=='mock'] = 0 x = x[x$Timepoint %in% c(0, tp), ] } else { x = x[x$Timepoint %in% tp,] } } y = aggregate(x[,flow_vars], list(x$UW_Line, x$Virus, x$Timepoint), mean, na.rm=T) colnames(y) = c('line', 'virus', 'tp', flow_vars) y = y[order(y$line, y$virus, y$tp),] rownames(y) = paste(y$line, y$virus, y$tp, sep='_') num_samples = dim(y)[1] var_indices = apply(y[,flow_vars], 2, function(x){sum(!is.na(x)) > num_samples*0.5}) flow_vars_clust = flow_vars[var_indices] dist_matrix = dist(t(as.matrix(y[,flow_vars_clust]))) } else { dist_matrix = "euclidean" } if (mocks_only) { x = flow_df[flow_df$UW_Line %in% lines & flow_df$Tissue==tissue & tolower(flow_df$Virus)=='mock' & flow_df$Timepoint %in% tp,] if (collapse_mocks) { x$Timepoint[tolower(x$Virus)=='mock'] = 0 } } else { x = flow_df[flow_df$UW_Line %in% lines & flow_df$Tissue==tissue,] if (collapse_mocks) { x$Timepoint[tolower(x$Virus)=='mock'] = 0 x = x[x$Timepoint %in% c(0, tp), ] } else { x = x[x$Timepoint %in% tp, ] } } y = aggregate(x[,flow_vars], list(x$UW_Line, x$Virus, x$Timepoint), mean, na.rm=T) colnames(y) = c('line', 'virus', 'tp', flow_vars) y$line = factor(y$line , levels = lines) y = y[order(y$line, y$virus, y$tp),] rownames(y) = paste(y$line, y$virus, y$tp, sep='_') blocks = c() for (l in lines) {blocks = c(blocks, max(which(y$line==l)))} ## Column annotations col_annot = y[,c('virus', 'line'), drop=F] col_annot$virus = as.factor(col_annot$virus) levels(col_annot$virus)[levels(col_annot$virus)=='Mock'] = 'M' levels(col_annot$virus)[levels(col_annot$virus)=='WNV'] = 'V' if (!is.null(line_labels)) { line_labels = factor(line_labels, levels=line_labels) col_annot$line = line_labels[col_annot$line] } else { col_annot$line = as.factor(col_annot$line) } #annot_colors = list(line=colorRampPalette(brewer.pal(9,'Set1'))(length(unique(y$line))), virus=c(M='gray', V='aquamarine')) annot_colors = list(virus=c(M='gray', V='white')) if (is.null(line_colors)) { #line_colors = sample(colorRampPalette(brewer.pal(9,'Set1'))(length(levels(col_annot$line))*3), length(levels(col_annot$line))) #line_colors = sample(primary.colors(length(levels(col_annot$line))*3), length(levels(col_annot$line))) #line_colors = c("#80FF00","#000000","#008080","#00FFFF","#800000","#FF00FF","#FF0000","#FF8000","#80FF80","#0080FF") line_colors = colorRampPalette(brewer.pal(9,'Set1'))(length(unique(y$line))) #line_colors = primary.colors(length(levels(col_annot$line))) } names(line_colors) = levels(col_annot$line) annot_colors[['line']] = line_colors ## Row and column labels num_samples = dim(y)[1] if (!no_cluster & !cluster_all) { var_indices = apply(y[,flow_vars], 2, function(x){sum(!is.na(x)) > num_samples*0.5}) flow_vars = flow_vars[var_indices] } else if (cluster_all) { flow_vars = flow_vars_clust } #print(flow_vars) if (!is.null(labels)) { labels_row = var_labels } else { labels_row=gsub("_count","", flow_vars) labels_row=gsub("Lymphocytes","Lymph", labels_row) } labels_col = paste(y$virus, y$tp, sep='_') labels_col = gsub("WNV_", " ", labels_col) if (collapse_mocks) { labels_col = gsub("Mock_.*", " M", labels_col) } else { labels_col = gsub("Mock_", " M", labels_col) } labels_col = gsub("$", " ", labels_col) ## Row annotations if (annotations & !is.null(herit_df)) { icc_var1 = paste(tissue, '_mock_icc_gelman_hill', sep="") icc_var2 = paste(tissue, '_icc_gelman_hill', sep="") row_annot = data.frame(ICC_mock=herit_df[flow_vars, icc_var1], ICC_all=herit_df[flow_vars, icc_var2]) rownames(row_annot) = flow_vars annot_colors[['ICC_mock']] = colorRampPalette(brewer.pal(9,'YlGn'))(20) annot_colors[['ICC_all']] = colorRampPalette(brewer.pal(9,'YlGn'))(20) } else { row_annot = NULL } ## Return heatmap matrix return(list(mat=t(as.matrix(y[,flow_vars])), cluster_rows=!no_cluster, dist_mat=dist_matrix, labels_rows=labels_row, labels_col=labels_col, col_annot=col_annot, row_annot=row_annot, blocks=blocks, annot_colors=annot_colors)) } attr(flow_heatmap_data, 'help') = " This function aggregates the data needed for creating heatmaps of the flow cytometry data. Parameters: flow_df: The dataframe containing the flow data. lines: A numeric vector containing the lines to be plotted. line_labels: Alternate labels for the lines tissue: A string indicating the tissue (e.g. 'brain' or 'spleen'). flow_vars: A character vector containing the variables to be plotted. var_labels: Alternate labels for the flow variables tp: A character vector containing the timepoints to be plotted. herit_df: The dataframe containing the heritability estimates (default=NULL); line_colors: A vector of color values (default=NULL; colors will be determined automatically). mocks_only: A logical indicating whether mocks only should be plotted. collapse_mocks: A logical indicating whether to combine the two mock timepoints. no_cluster: A logical indicating whether variables should be clustered. cluster_all: A logical indicating whether the variables should be clustered using all the data. annotations: A logical indicating whether annotations should be added to the heatmap (default=TRUE). Returns: A list containing the data to be plotted; input for the flow_heatmap_plot() function. " weight_percents = c('D0_Percentage','D1_Percentage','D2_Percentage', 'D3_Percentage','D4_Percentage','D5_Percentage','D6_Percentage', 'D7_Percentage','D8_Percentage','D9_Percentage','D10_Percentage', 'D11_Percentage','D12_Percentage','D13_Percentage','D14_Percentage', 'D15_Percentage','D16_Percentage','D17_Percentage','D18_Percentage', 'D19_Percentage','D20_Percentage','D21_Percentage','D22_Percentage', 'D23_Percentage','D24_Percentage','D25_Percentage','D26_Percentage', 'D27_Percentage','D28_Percentage') cs_columns = c('D0','D1','D2','D3','D4','D5','D6','D7','D8','D9','D10', 'D11','D12','D13','D14','D15','D16','D17','D18','D19','D20','D21', 'D22','D23','D24','D25','D26','D27','D28') flow_heatmap_plot = function(hm_data, weights_df=NULL, clinical_df=NULL, weight_cols=weight_percents, cs_cols=cs_columns, collapse_mocks=F, annotations=TRUE, ...) { ## Add weights to heatmap if (annotations & !is.null(weights_df) & !is.null(clinical_df)) { if (collapse_mocks) { weights_df$Timepoint[tolower(weights_df$Virus)=='mock'] = 0 } y_weights = aggregate(weights_df[, weight_cols], list(weights_df$UW_Line, weights_df$Virus, weights_df$Timepoint), mean, na.rm=T) y_weights$weight_change = NA for (i in 1:dim(y_weights)[1]) { w = y_weights[i,weight_cols] w = w[!is.nan(unlist(w)) & !is.na(unlist(w))] if (length(w) > 0) { y_weights$weight_change[i] = w[length(w)] } } colnames(y_weights) = c('line','virus','tp',weight_cols, 'weight_change') y_weights$tp[y_weights$tp=='d7'] = '7' y_weights$tp[y_weights$tp=='d12'] = '12' y_weights$tp[y_weights$tp=='d21'] = '21' y_weights$tp[y_weights$tp=='d28'] = '28' y_weights$tp[y_weights$tp=='d28m'] = '28' ## DEBUGGING / TESTING #print(y_weights) annot_colnames = colnames(hm_data$col_annot) hm_data$col_annot$weight_loss = NA hm_data$col_annot = hm_data$col_annot[,c('weight_loss', annot_colnames)] for (i in 1:dim(hm_data$col_annot)[1]) { id_vector = unlist(strsplit(rownames(hm_data$col_annot)[i], "_")) w_change = unlist(y_weights$weight_change[y_weights$line==as.numeric(id_vector[1]) & tolower(y_weights$virus)==tolower(id_vector[2]) & y_weights$tp==id_vector[3]]) if (!is.null(w_change) & length(w_change)>0) { hm_data$col_annot$weight_loss[i] = -w_change } else { hm_data$col_annot$weight_loss[i] = NA } } ## DEBUGGING / TESTING #print(hm_data$col_annot) if (all(is.na(hm_data$col_annot$weight_loss))) { hm_data$col_annot = hm_data$col_annot[,!colnames(hm_data$col_annot) %in% c('weight_loss')] } else { hm_data$annot_colors[['weight_loss']] = colorRampPalette(brewer.pal(9,'RdYlBu'))(20) } ## Add clinical score to heatmap if (collapse_mocks) { clinical_df$Timepoint[tolower(clinical_df$Virus)=='mock'] = 0 } y_cs = aggregate(clinical_df[, cs_cols], list(clinical_df$UW_Line, clinical_df$Virus, clinical_df$Timepoint), max, na.rm=T) y_cs$clinical_score = NA for (i in 1:dim(y_cs)[1]) { cs = y_cs[i,cs_cols] cs = cs[!is.nan(unlist(cs)) & !is.na(unlist(cs)) & is.finite(unlist(cs))] if (length(cs) > 0) { y_cs$clinical_score[i] = max(cs, na.rm=T) } } colnames(y_cs) = c('line','virus','tp',cs_cols, 'clinical_score') y_cs$tp[y_cs$tp=='d7'] = '7' y_cs$tp[y_cs$tp=='d12'] = '12' y_cs$tp[y_cs$tp=='d21'] = '21' y_cs$tp[y_cs$tp=='d28'] = '28' y_cs$tp[y_cs$tp=='d28m'] = '28' ## DEBUGGING / TESTING #print(y_cs) annot_colnames = colnames(hm_data$col_annot) hm_data$col_annot$clinical_score = NA hm_data$col_annot = hm_data$col_annot[,c('clinical_score', annot_colnames)] for (i in 1:dim(hm_data$col_annot)[1]) { id_vector = unlist(strsplit(rownames(hm_data$col_annot)[i], "_")) cs = unlist(y_cs$clinical_score[y_cs$line==as.numeric(id_vector[1]) & tolower(y_cs$virus)==tolower(id_vector[2]) & y_cs$tp==id_vector[3]]) if (!is.null(cs) & length(cs)>0) { hm_data$col_annot$clinical_score[i] = cs } else { hm_data$col_annot$clinical_score[i] = NA } } if (all(is.na(hm_data$col_annot$clinical_score))) { hm_data$col_annot = hm_data$col_annot[,!colnames(hm_data$col_annot) %in% c('clinical_score')] } else { hm_data$col_annot$clinical_score = as.factor(hm_data$col_annot$clinical_score) cs_colors = colorRampPalette(brewer.pal(5,'Blues'))(length(levels(hm_data$col_annot$clinical_score))) names(cs_colors) = levels(hm_data$col_annot$clinical_score) hm_data$annot_colors[['clinical_score']] = cs_colors } } else { hm_data$col_annot = NULL } ## Plot heatmap pheatmap(hm_data$mat, cluster_cols=F, cluster_rows=hm_data$cluster_rows, clustering_distance_rows=hm_data$dist_mat, scale='row', gaps_col=hm_data$blocks, labels_col=hm_data$labels_col, labels_row=hm_data$labels_row, color=colorRampPalette(brewer.pal(9,'RdYlBu'))(20), annotation_row=hm_data$row_annot, annotation_col=hm_data$col_annot, annotation_colors=hm_data$annot_colors, fontsize=11, fontsize_col=9, ...) return(NULL) } attr(flow_heatmap_plot, 'help') = " This function plots the data returned by flow_heatmap_data(). Parameters: hm_data: This should be a list returned by flow_heatmap_data(). weights_df: The dataframe containing the weight loss data (default=NULL). clinical_df: The dataframe containing the clinical score data (default=NULL). weight_cols: A character vector containing the columns names of the weight measurements (default=weight_percents). cs_cols: A character vector containing the columns names of the clinical scores (default=cs_columns). annotations: A logical indicating whether annotations should be added to the heatmap (default=TRUE). Returns: NULL Examples: flow_heatmap_plot(heatmap_data, weights, scores) "