--- title: "Phylogenetic analysis of coronavirus sequences" author: "Jacques van Helden" date: "`r Sys.Date()`" output: html_document: df_print: paged code_folding: hide fig_caption: yes highlight: zenburn self_contained: no theme: cerulean toc: yes toc_depth: 3 toc_float: yes ioslides_presentation: colortheme: dolphin fig_caption: yes fig_height: 6 fig_width: 7 fonttheme: structurebold highlight: tango smaller: yes toc: yes widescreen: yes beamer_presentation: colortheme: dolphin fig_caption: yes fig_height: 6 fig_width: 7 fonttheme: structurebold highlight: tango incremental: no keep_tex: no slide_level: 2 theme: Montpellier toc: yes word_document: toc: yes toc_depth: '3' slidy_presentation: fig_caption: yes fig_height: 6 fig_width: 7 highlight: tango incremental: no keep_md: yes smaller: yes theme: cerulean toc: yes widescreen: yes pdf_document: fig_caption: yes highlight: zenburn toc: yes toc_depth: 3 font-import: http://fonts.googleapis.com/css?family=Risque font-family: Garamond transition: linear --- ```{r libraries, echo=FALSE, results=FALSE, warning=FALSE, message=FALSE} #### Install required packages #### required.packages <- c("knitr", "ape", "phytools") for (pkg in required.packages) { if (!require(pkg, character.only = TRUE)) { message("Installing package ", pkg) install.packages(pkg, dependencies = TRUE) } require(pkg, character.only = TRUE) } ``` ```{r knitr_settings, include=FALSE, echo=FALSE, eval=TRUE} library(knitr) options(width = 300) knitr::opts_chunk$set( fig.width = 7, fig.height = 5, fig.path = 'figures/phylogeny/CoV_', fig.align = "center", size = "tiny", echo = TRUE, eval = TRUE, warning = FALSE, message = FALSE, results = TRUE, comment = "") # knitr::asis_output("\\footnotesize") ## Store original graphic parameters to restore them after chunks par.ori <- par(no.readonly = TRUE) ``` ```{r parameters} ## Color palette per species speciesPalette <- list( Human = "#880000", Bat = "#888888", Pangolin = "#448800", Camel = "#BB8800", Pig = "#FFBBBB", Civet = "#00BBFF" ) ## Species prefix in the tip labels speciesPrefix <- c("Hu" = "Human", "Bt" = "Bat", "Pn" = "Pangolin", "Cm" = "Camel", "Pi" = "Pig", "Cv" = "Civet") ## Strain-specific colors strainColor <- c( "HuCoV2" = "red", "HuSARS-Fr" = "#0044BB", "PnGu1" = "#00BB00", "BtRaTG13" = "#FF6600", "BtYu-RmYN" = "#FFBB22", "BtZXC21" = "black", "BtZC45" = "black", "BtCambodia/RShSTT200/2010" = "#8822BB", "BtCambodia/RShSTT182/2010" = "#8822BB", "BtRacCS203" = "", ) ## Define feature types features <- c( # "genomes", # "S-gene", "S1", "S2", "RBD", # "Recomb-Xiao", "Recomb-reg-1", "Recomb-reg-2", "Recomb-reg-3", "CDS-ORF1ab", "After-ORF1ab", "CDS-S", "CDS-ORF3a", "CDS-E", "CDS-M", "CDS-ORF6", "CDS-ORF7a", "CDS-ORF8", "CDS-N", "CDS-ORF10") feature <- "S-gene" ## Define collections collections <- c("around-CoV-2", "selected") collection <- "around-CoV-2" # default for testing ## Outgroup per collection outgroups <- list() outgroups[["selected"]] <- c( "HuOC43", "PiPRCV", "HuTGEV", "PiSADS", "Hu229E", "HuNL63") outgroups[["around-CoV-2"]] <- "BtBM48-31" ## Use GISAID data useGISAID <- TRUE if (useGISAID) { collections <- paste0(collections, "-plus-GISAID") for (collection in names(outgroups)) { outgroups[[paste0(collection, "-plus-GISAID")]] <- outgroups[[collection]] } collection <- paste0(collection, "-plus-GISAID") } ``` ```{r directories} dir <- vector() dir["main"] <- ".." dir["results"] <- file.path(dir["main"], "results") dir["genomes"] <- file.path(dir["results"], "genome_phylogeny", "clustalw_alignments") dir["R"] <- file.path(dir["main"], "scripts", "R") # list.files(dir["R"]) source(file.path(dir["R"], "load_tree.R")) source(file.path(dir["R"], "plot_my_tree.R")) ``` ## Phylogeny from full genomes We inferred a phylogeny of virus strains based ontheir full genomes. A multiple alignment of genome sequences was performed with a progressive method (`clustalw`). The tree of virus strains was inferred with a maximum likelihood approach (`phyml` software). ```{r genome_tree, fig.width=12, fig.height=7, out.width="100%", fig.cap="Genome tree of selected coronaviruses. The tree was inferred by maximum likelihood apprroach (PhyML) based on a progressive multiple alignment (clustalw). "} #### Load and plot the genome tree #### genomeTreeFile <- file.path( dir["genomes"], "coronavirus_selected-plus-GISAID_genomes_clustalw_gblocks.phy_phyml_tree.phb") genomeTree <- loadTree( treeFile = genomeTreeFile, outgroup = outgroups[['selected']], rootNode = NULL, speciesPalette = speciesPalette, tipColor = strainColor, nodesToRotate = c(39, 75, 42)) # genomeTree <- paintSubTree(tree = genomeTree, node = 49, state = "CoV2") plotMyTree(genomeTree, main = "Genome-based virus tree", scaleLength = 0.1, show.node.label = FALSE) # nodelabels(cex = 0.4) ## Identify some clades cladelabels(genomeTree$tree, "CoV2", 46, cex = 0.7, orientation = "horizontal", offset = 5) cladelabels(genomeTree$tree, "MERS", 43, cex = 0.7, orientation = "horizontal", offset = 5) cladelabels(genomeTree$tree, "SARS", 69, cex = 0.7, orientation = "horizontal", offset = 5) ``` ## Tree per feature ```{r feature_tree, fig.width=6, fig.height=6, results="asis", out.width="100%", fig.cap="Feature-specific tree. The tree was inferred by maximum likelihood apprroach (PhyML) based on a progressive multiple alignment (clustalw). "} #### Load and plot feature-specific trees #### ## Define vectors to hold the results and enable tree comparisons treeFiles <- vector() treeData <- list() ## Define the path to the tre file feature <- "RBD" for (feature in features) { cat(" \n### ", feature, "\n") message("\n\tReading tree for feature ", feature) prefix <- paste0(feature, "_", collection) treeFile <- file.path( dir["results"], prefix, paste0(prefix, "_clustalw_gblocks.phy_phyml_tree_GTR.phb")) treeFiles[feature] <- treeFile ## Load the tree treeData[[feature]] <- loadTree( treeFile = treeFiles[feature], outgroup = outgroups[[collection]], rootNode = NULL, speciesPalette = speciesPalette, tipColor = strainColor, nodesToRotate = NULL) plotMyTree(treeData[[feature]], main = paste0(feature, " tree"), scaleLength = 0.05, cex = 1, label.offset = 0.01, show.node.label = FALSE) # ## Identify some clades # cladelabels(genomeTree$tree, "CoV2", 46, cex = 0.7, orientation = "horizontal", offset = 5) # cladelabels(genomeTree$tree, "MERS", 43, cex = 0.7, orientation = "horizontal", offset = 5) # cladelabels(genomeTree$tree, "SARS", 69, cex = 0.7, orientation = "horizontal", offset = 5) } ```