###################################################### ###################################################### # Analyses inferred topology and node heights of # species trees ###################################################### ###################################################### library(ggplot2) library(phytools) library(ape) library("OutbreakTools") library(ggtree) # clear workspace rm(list = ls()) # Set the directory to the directory of the file this.dir <- dirname(parent.frame(2)$ofile) setwd(this.dir) # define the burn-in percentage burn_in = 0.1 # define the prior probability of an indicator being active (here it's lambda/(nr indicators) ) prior = 0.695/50 # Read In Data --------------------------------- mig_rate <- data.frame() counter <- 1 i = 1 # Make the filenames for all possible migration rates print("to fix the error: 'Error in start:end : NA/NaN argument' appears, add an 'End;' to the end of the tree file") trees <- "./../precooked_runs/species_long.trees" # get all the tree topologies in the posterior, if the file isn't ended by END;, this wll cause an error est_species_tree_with_nodes <- read.annotated.nexus(trees) est_species_tree_with_nodes = est_species_tree_with_nodes[seq(-1,-round(length(est_species_tree_with_nodes)*burn_in))] # get all the ossible phylogenies, does however not get the ranked topologies unique_topologies <- unique.multiPhylo(est_species_tree_with_nodes,use.edge.length = FALSE, use.tip.label = TRUE) annotations <- vector(, length(est_species_tree_with_nodes)) # get the node order for each of the species trees for (i in seq(1,length(est_species_tree_with_nodes))){ for (j in seq(1,length(unique_topologies))){ eq <- all.equal(est_species_tree_with_nodes[[i]], unique_topologies[[j]],use.edge.length = FALSE, use.tip.label = TRUE) if (eq){ top_nr <- j break; } } # get the node heights and all subtrees subs = subtrees(est_species_tree_with_nodes[[i]]) nr_tips = length(est_species_tree_with_nodes[[i]]$tip.label) # init the annotations data frame new.annotations <- data.frame(sample=i) # get the root heights of all subtrees node_heights <- rep(0,2*nr_tips-1) node_names <- vector(,2*nr_tips-1) for (j in seq(1,nr_tips)){ node_names[j] <- est_species_tree_with_nodes[[i]]$tip.label[j] } for (j in seq(1,length(subs))){ all_heights <- nodeHeights(subs[[j]]) node_heights[nr_tips+j] <- max(all_heights[,2]) node_names[nr_tips+j] <- paste(sort(subs[[j]]$tip.label), collapse=":") new_name = paste("height&", node_names[nr_tips+j], sep="") new.annotations[,new_name] = max(all_heights[,2]) } # get the correct mapping of labels to node numbers, make it way too complicated tree_edges <- est_species_tree_with_nodes[[i]]$edge node_species <- rep(-1,2*nr_tips-1) node_ne <- rep(-1,2*nr_tips-1) # get the nes for (j in seq(1, length(tree_edges[,2]))){ node_label <- tree_edges[j,2] node_species[node_label] <- est_species_tree_with_nodes[[i]]$annotations[[j]]$species new_name = paste("Ne&", node_names[node_label], sep="") new.annotations[,new_name] = est_species_tree_with_nodes[[i]]$annotations[[j]]$Ne } node_label <- nr_tips+1 new_name = paste("Ne&", node_names[node_label], sep="") new.annotations[,new_name] = est_species_tree_with_nodes[[i]]$root.annotation$Ne # get the migration rates for (j in seq(1, length(tree_edges[,2]))){ node_label <- tree_edges[j,2] for (k in seq(1, length(est_species_tree_with_nodes[[i]]$annotations[[j]]$to))){ target = which(node_species == est_species_tree_with_nodes[[i]]$annotations[[j]]$to[[k]]) new_name = paste("mig&", node_names[node_label], "&", node_names[target], sep="") new.annotations[,new_name] = est_species_tree_with_nodes[[i]]$annotations[[j]]$rates[[k]] } } # get the correct order indices <- sort(node_heights,index.return = TRUE) new.dat <- data.frame(topology=top_nr) new.heights <- data.frame(topology=top_nr) annotations[i] <- list(new.annotations) for (j in seq(1,length(indices$ix))){ new_name = paste("n", j, sep="") new.dat[,new_name] = node_names[[indices$ix[[j]]]] new.heights[,new_name] = node_heights[[indices$ix[[j]]]] } if (i==1){ tree_top = new.dat }else{ tree_top = rbind(tree_top,new.dat) } } # get the unique ranked topologies unique_ranked <- unique(tree_top) median_node_heights <- vector(, length(unique_ranked$topology)) log <- vector(, length(unique_ranked$topology)) # keeps track of the posterior support for tree bpp_support = rep(0, length(unique_ranked$topology)) # for each of these trees, make an individual log file for (i in seq(1, length(unique_ranked$topology))){ # get the row and change the row name to compare rows ur <- unique_ranked[i,] row.names(ur) <- 1 first = TRUE for (j in seq(1, length(tree_top$n1))){ tocomp <- tree_top[j,] row.names(tocomp) <- 1 if (identical(ur,tocomp)){ if (first){ log.data <- annotations[[j]] all_labels = labels(log.data) first <- FALSE }else{ new.dat <- data.frame(sample=j) for (k in seq(1,length(all_labels[[2]]))){ new.dat[1, all_labels[[2]][[k]]] <- annotations[[j]][,all_labels[[2]][[k]]] } log.data <- rbind(log.data,annotations[[j]]) } } } # get the median node heights of the ranked trees (and the upper and lower limits) new.median_node_heights <- vector(, length(node_heights)) all_labels = labels(log.data) nc = 1 for (j in seq(1,length(all_labels[[2]]))){ if (startsWith(all_labels[[2]][[j]],"height&")){ new.median_node_heights[[nc]] <- median(log.data[,all_labels[[2]][[j]]]) nc <- nc+1 } } median_node_heights[[i]] <- list(new.median_node_heights) log[i] <- list(log.data) bpp_support[i] <- length(log.data$sample) fname1 <- gsub("\\.trees", paste("_", i, "tmp.log", sep=""), trees) fname2 <- gsub("\\.trees", paste("_", i, ".log", sep=""), trees) write.table(log.data, file=fname1, quote=F, row.names=F, sep="\t") system(paste("/Applications/BEAST\\ 2.5.0/bin/logcombiner -renumber -burnin 0 -log", fname1, "-o", fname2, sep=" ")) system(paste("rm -r ", fname1)) } sum_sup = sum(bpp_support) bpp__proc_support = rep(0, length(unique_ranked$topology)) for (i in seq(1,length(bpp_support))){ bpp__proc_support[i]= bpp_support[i]/sum_sup } # recompute the branch lengths of each unique ranked phylogeny to be the median branch length for (i in seq(1, length(unique_ranked$topology))){ # find the first tree with the same ranked topology ur <- unique_ranked[i,] row.names(ur) <- 1 for (j in seq(1, length(tree_top$n1))){ tocomp <- tree_top[j,] row.names(tocomp) <- 1 if (identical(ur,tocomp)){ plot_tree <- ladderize(est_species_tree_with_nodes[[j]],right=F) break } } # remove annotations plot_tree$annotations <- NULL plot_tree$root.annotation <- NULL # get the median node heights node_heights <- sort(median_node_heights[[i]][[1]]) ape_node_edges = plot_tree$edge # get the ordering of nodes in the ape tree by comparing the heights ape_node_heights = abs(nodeHeights(plot_tree)-max(nodeHeights(plot_tree)[,2]))[seq(1,length(ape_node_edges[,1]),2),1] # get the corresponding numbers ape_node_numbers = ape_node_edges[seq(1,length(ape_node_edges[,1]),2),1] indices <- order(ape_node_heights) # get the original indices ori_indices <- seq(1,length(indices)) new.node_heights <- node_heights # order the node_heights to match ape noe heights for (j in seq(1, length(indices))){ new.node_heights[indices[j]+length(plot_tree$tip.label)] <- node_heights[j+length(plot_tree$tip.label)] } node_heights <- new.node_heights new.node_heights <- node_heights # now also match the node order for (j in seq(1,length(ape_node_numbers))){ new.node_heights[ape_node_numbers[j]] <- node_heights[j+length(plot_tree$tip.label)] } # change the edge lengths to match the median heights for (j in seq(1, length(plot_tree$edge.length))){ plot_tree$edge.length[j] <- new.node_heights[ape_node_edges[j,1]] - new.node_heights[ape_node_edges[j,2]] } rm(mig_rates) # get all migraiton rates with a bayes factor over ... log.data = log[[i]] all_labels = labels(log.data) nc=1; for (j in seq(1,length(all_labels[[2]]))){ if (startsWith(all_labels[[2]][[j]],"mig&")){ posterior = length(which(log.data[,all_labels[[2]][[j]]]>0))/length(log.data[,all_labels[[2]][[j]]]) bayes = posterior*(1-prior)/((1-posterior)*prior) if (bayes>10){ new.mig_rates <- data.frame(from = strsplit(all_labels[[2]][[j]], "&")[[1]][3], to = strsplit(all_labels[[2]][[j]], "&")[[1]][2], BF=bayes, post=posterior) if (nc==1){ mig_rates = new.mig_rates nc <- nc+1 }else{ mig_rates = rbind(mig_rates, new.mig_rates) } } } } print("fdslkjfdjsklfdjkls") rm(arrows.data) # build the arrow dataframe if (exists("mig_rates")){ for (j in seq(1,length(mig_rates$from))){ from_leaves = strsplit(as.character(mig_rates[j, "from"]), split=":") if (length(from_leaves[[1]])==1){ from_mrca = which(plot_tree$tip.label==from_leaves[[1]][[1]]) }else{ from_mrca = findMRCA(plot_tree, tips=from_leaves[[1]], type=c("node","height")) } to_leaves = strsplit(as.character(mig_rates[j, "to"]), split=":") if (length(to_leaves[[1]])==1){ to_mrca = which(plot_tree$tip.label==to_leaves[[1]][[1]]) }else{ to_mrca = findMRCA(plot_tree, tips=to_leaves[[1]], type=c("node","height")) } # get the heights of from and to nodes and the heights of their parents from_heights = nodeHeights(plot_tree)[which(plot_tree$edge[,2]==from_mrca),] to_heights = nodeHeights(plot_tree)[which(plot_tree$edge[,2]==to_mrca),] x_coord = (min(from_heights[2], to_heights[2])-max(from_heights[1], to_heights[1]))/2+max(from_heights[1], to_heights[1]) new.arrows = data.frame(y=node.height(plot_tree)[from_mrca], yend=node.height(plot_tree)[to_mrca], x=x_coord, xend=x_coord, post=mig_rates[j, "post"]) if (j==1){ arrows.data = new.arrows }else{ arrows.data = rbind(arrows.data, new.arrows) } } } p <- ggtree(plot_tree) + scale_x_reverse() + coord_flip() if (exists("arrows.data")){ p <- p + geom_curve(data=arrows.data,curvature = -0.3, aes(x=x, xend=xend,y = y, yend=yend, size=post/2), arrow = arrow(angle=10,type="closed",ends="last"), alpha=1) } p <- p + geom_tiplab()+ ggtitle(sprintf("posterior support = %.3f", bpp__proc_support[[i]])) fname1 <- gsub("\\.trees", paste("_", i, ".pdf", sep=""), trees) ggsave(plot=p, filename=fname1) }