## OPERATIONS ON PHYLOGENETIC TREES WITH GGTREE
## For documentation on ggtree and treeio see: 
## https://yulab-smu.top/treedata-book/index.html
## Original code for plotting ML and bootstrap supports: 
## https://www.protocols.io/view/plotting-phylogenetic-tree-with-branch-supports-fr-n9fdh3n?step=1

## To install ggtree / treeio
# if (!requireNamespace('BiocManager', quietly = TRUE))
#   install.packages('BiocManager')
# BiocManager::install(c("ggtree", "treeio"))

library(treeio)
library(ggtree)
library(ape)
library(ggplot2)
library(geiger)

## TREE IMPORT

## Read trees: ML tree and Bayesian tree. This works for MrBayes output built
## with conformat=simple. If you used default conformat=FigTree in MrBayes block,
## import like specified the detour below.
bootTree <- treeio::read.newick ('data/tree_raxml.nwk')
bayesTree <- ape::read.nexus ('data/tree_mrbayes.nex')

## This Bayesian tree file includes 2 trees: first with  posterior supports, and
## second without them. We will operate on the first.
bayesTree <- bayesTree[[1]]

######## DETOUR TO TRANSFORM DEFAULT MRBAYES FORMATTING

## For downstream plotting we must have posteriors as node labels. Default
## MrBayes output (conformat=FigTree) contains posteriors in comments that, when
## imported, go to 'data' part of treedata object, and not in 'phylo$node.label'
## where we need them. It can be fixed as follows.

# bayesTree <- read.mrbayes ('data/tree_mrbayes_figtree.nex')

## Sort nodes in data to match node order in phylo. (Below is MrBayes
## version, but with few adjustments it also works for BEAST)
# bayesTree@data <- bayesTree@data[order(as.numeric(bayesTree@data$node)), ]

## Write posteriors as node labels. 
## If tree is already rooted, do not append '' to vector. 
# bayesTree@phylo$node.label <-
#   append ('', as.vector(subset(
#     bayesTree@data$prob,
#     as.numeric(bayesTree@data$node) > length(bayesTree@phylo$tip.label)
#   )))

## Translate treedata into phylo.
# bayesTree <- as.phylo(bayesTree)

######## END OF DETOUR

## Visual check of the trees.
ggtree(bootTree) +
  geom_tiplab() +
  geom_text2(
    aes(label = label, subset = !isTip),
    hjust = 1.3,
    vjust = -0.4,
    size = 2
  ) +
  xlim(0, 0.3)

ggtree(bayesTree) +
  geom_tiplab() +
  geom_text2(
    aes(label = round(as.numeric(label), 2), subset = !isTip),
    hjust = 1.3,
    vjust = -0.4,
    size = 2
  ) +
  xlim(0, 0.3)
## Note: Warnings about NAs introduced by coercion can be safely ignored.

## REROOTING

## For the next step we need to root both trees. To root the tree, you need to
## know either 1) labels of outgroup taxa, or 2) an ID of a node joining all the
## taxa belonging to the outgroup.

## First rerooting option, by specifying outgroup labels.
bayesTree_rooted <-
  ape::root(bayesTree,
            outgroup = c('MM35985', 'MYX328'),
            edgelabel = TRUE)
## note: edgelabel = T is essential for correct label placement, otherwise
## labels can slip to wrong branches. For details see Czech et al. 2017 "A
## Critical Review on the Use of Support Values [...]"
## https://doi.org/10.1093/molbev/msx055

## Same for ML tree
bootTree_rooted <-
  ape::root(bootTree,
            outgroup = c('MM35985', 'MYX328'),
            edgelabel = TRUE)

## If outgroup is large, the option with node ID may be preferable. The easiest
## way to locate needed ID is to plot all of them (on example of bayesian tree).
# ggtree(bootTree) +
#   geom_tiplab() +
#   geom_text(
#     aes(label = node),
#     hjust = 1.3,
#     vjust = 1.3,
#     size = 2,
#     color = 'red'
#   ) +
#   xlim(0, 0.3)

## If your tree is large and crowded, it may be difficult to spot the desired
## node. Another way is to make a table from the tree, find a taxon from the
## outgroup and trace node-to-node connections down to the common node of all
## outgroup taxa. 
# bayesTree_tib <- as_tibble(bayesTree)

## In case of Bayesian tree it's node 89.
# bayesTree_rooted <- ape::root(bayesTree, node = 89, edgelabel = TRUE)

## check the result
ggtree(bootTree_rooted) +
  geom_tiplab() +
  geom_text2(
    aes(label = round(as.numeric(label), 2), subset = !isTip),
    hjust = 1.3,
    vjust = -0.4,
    size = 2
  ) +
  xlim(0, 0.3)

ggtree(bayesTree_rooted) +
  geom_tiplab() +
  geom_text2(
    aes(label = round(as.numeric(label), 2), subset = !isTip),
    hjust = 1.3,
    vjust = -0.4,
    size = 2
  ) +
  xlim(0, 0.3)

## COMBINE BOOTSTRAPS AND POSTERIORS

## The getSupports function compares all the subclades in the Bayes tree
## to all the subclades in the bootstrap tree, and vice versa, and identifies
## all those clades that are identical. It produces a data frame supportsTable
## with node IDs and their respective bootstraps for further use in ggtree.
## Fetch getSupports from GitHub:
source('https://github.com/Mycology-Microbiology-Center/Phylo2021/blob/main/scripts/getSupports.R?raw=TRUE')

## Apply function to our trees and proceed to tree annotation.
supportsTable <- getSupports(primaryTree = bayesTree_rooted, secondaryTree = bootTree_rooted)

## RELABELING

## Read substitution table for tip labels. The first column must contain the
## same values as tips in the tree, the other column(s) - your pretty labels.
labels <- read.csv('data/labels_simple.csv')

## Add bootstrap values obtained with the getSupports function above.
tree_data <- ggtree(bayesTree_rooted) %<+% supportsTable
## Note: in the same way, the table can contain any other data you may want to
## plot, e.g. values of morphological traits. %<+% is a ggtree-specific operator, 
## basically left join.

## Combine tree and new labels.
tree_data <- tree_data %<+% labels

## Write the result.
tree_data_lab <-
tree_data +
  geom_tiplab(aes(label = label_pretty), offset = 0.001) +
  geom_label2(
    aes(
      label = paste(
        ifelse(is.na(round(as.numeric(label), 2)),
        '-', round(as.numeric(label), 2)),
        ifelse(is.na(support),
               '-', support),
        sep = '/'
      ),
      subset = !isTip & label != 'Root'
    ),
    hjust = 1.1,
    vjust = -0.3,
    alpha = 0.8,
    label.size = NA,
    label.padding = unit(0.2, 'mm'),
    size = 2
  ) +
  xlim(0, 0.5)
## Note: xlim is needed to fix truncation of long labels. You may need to change
## the second parameter or even drop xlim altogether, depending on your tree.

## Check the result.
tree_data_lab

## You can change some default aesthetics of the tree post mortem.
## Change all edges from round to miter.
tree_data_lab[["layers"]][[1]][["geom_params"]][["lineend"]] <- 'square'
tree_data_lab[["layers"]][[2]][["geom_params"]][["lineend"]] <- 'square'
## Change thickness of tree lines.
tree_data_lab[["layers"]][[1]][["geom"]][["default_aes"]][["size"]] <- 0.7

## Check the result.
tree_data_lab

## Save the result.
dir.create('output')
ggsave(
  filename = 'output/tree_data_lab.pdf',
  device = 'pdf',
  width = 210,
  height = 297,
  units = 'mm'
)
## Note: This saves the last plotted graph. If you need to save a particular
## object, specify plot = object_name

## BONUS RENAMING

## Read table with more information for labels.
labels_extended <- read.csv('data/labels_extended.csv', na.strings = '')

## Add more data.
tree_data <- tree_data %<+% labels_extended

## Plot the tree with ridiculously detailed labels.
tree_data +
  geom_tiplab(
    aes(label = paste(
      label,
      ifelse(is.na(Organism),
             '', paste(',', Organism)),
      ifelse(is.na(Country),
             '', paste(',', Country)),
      ifelse(is.na(Altitude),
             '', paste(', altitude:', Altitude, 'm')),
      sep = ''
    )),
    geom = 'label',
    label.size = NA,
    label.padding = unit(0, 'mm'),
    offset = 0.001
  ) +
  geom_label2(
    aes(
      label = paste(
        ifelse(is.na(round(as.numeric(label), 2)),
        '-', round(as.numeric(label), 2)),
        ifelse(is.na(support),
               '-', support),
        sep = '/'
      ),
      subset = !isTip & label != 'Root'
    ),
    hjust = 1.1,
    vjust = -0.3,
    alpha = 0.8,
    label.size = NA,
    label.padding = unit(0.2, 'mm'),
    size = 2
  ) +
  xlim(0, 0.5)

## Save the result.
ggsave(
  filename = 'output/tree_data_lab_ext.pdf',
  device = 'pdf',
  width = 420,
  height = 297,
  units = 'mm'
)

## BONUS HIGHLIGHTING CLADES

## For this purpose ggtree provides several geoms, e.g. geom_hilight (yep),
## geom_balance (see again https://yulab-smu.top/treedata-book/chapter5.html).
## With geom_cladelabel we additionally can mark clades with labeled vertical
## bars. We will highlight 2 clades and add 1 labeled bar by adding these
## geometries.

tree_data +
  geom_label2(
    aes(
      label = paste(
        ifelse(is.na(round(as.numeric(label), 2)),
        '-', round(as.numeric(label), 2)),
        ifelse(is.na(support),
               '-', support),
        sep = '/'
      ),
      subset = !isTip & label != 'Root'
    ),
    hjust = 1.1,
    vjust = -0.3,
    alpha = 0.8,
    label.size = NA,
    label.padding = unit(0.2, 'mm'),
    size = 2
  ) +
  geom_hilight(
    node = 65,
    fill = "#1B9E77",
    alpha = 0.3,
    extend = 0.22
  ) +
  geom_hilight(
    node = 56,
    fill = "#F54748",
    alpha = 0.3,
    extend = 0.20
  ) +
  geom_cladelabel(
    56,
    "SP. NOV.",
    offset = 0.12,
    barsize = 2,
    align = TRUE,
    angle = 90,
    offset.text = 0.008,
    extend = 0.5,
    hjust = 0.5,
    fontsize = 5
  ) +
  geom_tiplab(aes(label = label_pretty), offset = 0.001) +
  xlim(0, 0.5)

## Save the result.
ggsave(
  filename = 'output/tree_data_highlighted.pdf',
  device = 'pdf',
  width = 210,
  height = 297,
  units = 'mm'
)