## ## Fitting models to trait data ## Here we are going to do some simple examples of fitting models of continuous characte evolution to comparative data ## We are going to use the `geiger` package for this ## ## install.packages("geiger") library(geiger) packageVersion("geiger") ## In case geiger's package version is less than 1.9 library(devtools) install_github("mwpennell/geiger-v2") ## rm(list=ls()) ## Load the Cyprinodon tree and dataset cyp_phy <- read.tree("datasets/cyprinodon.tre") ## Tip: Setting `row.names=1` assigns the rownames to be equal to the first column (in this case the species names) cyp_dat <- read.csv("datasets/cyprinodon.csv", row.names=1) head(cyp_dat) ## For comparative analyses, we need to match the tree and the data. If data is in the form of a vector, names must match the tip labels of the tree. If data is a matrix or data.frame, then rownames need to match the tree. **N.B.:** geiger automatically checks and matches the names of the data and the tree but not all packages/functions do this. It is good practice to **always** do this manually to avoid non-sensical results ## Use geiger's `treedata()` function for this cyp_td <- treedata(cyp_phy, cyp_dat) str(cyp_td) dummy_tree <- cyp_phy head(dummy_tree$tip.label) dummy_tree_drop <- drop.tip(dummy_tree, tip="Cyprinodon_alvarezi") dummy_tree_drop ## These match. But here is what happens when this is not the case data(geospiza) geo_phy <- geospiza$phy geo_dat <- geospiza$dat geo_td <- treedata(geo_phy, geo_dat) ## To do this "by hand" geo_phy <- geospiza$phy geo_dat <- geospiza$dat to_drop <- geo_phy$tip.label[!geo_phy$tip.label %in% rownames(geo_dat)] ## Drop the tips geo_phy_pr <- drop.tip(geo_phy, tip=to_drop) geo_phy_pr ## Trim down the dataset geo_dat_pr <- geo_dat[geo_phy_pr$tip.label,] ## Select all columns geo_dat_pr ## ## Fitting models of trait evolution ## Use "jaw.length" as the trait of interest. Use the `treedata` object because we know that the tips are already matched. states <- cyp_td$data[,"jaw.length"] tree <- cyp_td$phy states <- states[tree$tip.label] ## Brownian motion fit_bm <- fitContinuous(tree, states, model="BM") ## Examine the model object fit_bm fit_bm$opt fit_bm$opt$sigsq fit_bm$opt$aic ## Ornstein-Uhlenbeck fit_ou <- fitContinuous(tree, states, model="OU") ## Early burst fit_eb <- fitContinuous(tree, states, model="EB") ## Compare the models using AIC aic_bm <- fit_bm$opt$aic aic_ou <- fit_ou$opt$aic aic_eb <- fit_eb$opt$aic ## Get the AIC weights ## Create a named vector aic_all <- c(aic_bm, aic_ou, aic_eb) names(aic_all) <- c("BM", "OU", "EB") aicw(aic_all) ## Almost all the AICw is on OU ## Look at the parameter values of the OU model fit_ou$opt ## Calculate the phylogenetic half-life -- how long does it take for half the infromation in the phylogeny to be erased phy_halflife <- log(2)/fit_ou$opt$alpha phy_halflife ## Compare this to total tree depth phy_halflife / max(branching.times(tree)) ## In one quarter of the tree height, half the information is lost ## ## Estimating phylogenetic signal (Pagel's lambda) fit_lambda <- fitContinuous(tree, states, model="lambda") ## The estimate will be between 0 and 1 fit_lambda$opt$lambda ## ## Incorporating error ## Simulate some data sim_phy <- sim.bdtree(b=1, d=0, stop="taxa",n=1000, seed=2) sim_dat <- sim.char(sim_phy, par=1, model="BM")[,,] ## Fit BM sim_fit <- fitContinuous(sim_phy, sim_dat, SE=0, model="BM") sim_fit$opt$sigsq ## Now add error to the data sim_dat_error <- sim_dat + rnorm(1000, 0, sd=0.1) sim_fit_error <- fitContinuous(sim_phy, sim_dat_error, SE=0, model="BM") sim_fit_error$opt$sigsq sim_fit_wse <- fitContinuous(sim_phy, sim_dat_error, SE=0.1, model="BM") sim_fit_wse$opt$sigsq ## ## Discrete Characters ## Simulate characters on Cyprinodon tree because we don't have discrete data available for this group ## Make a Q matrix -- same as for molecular characters Q <- matrix(c(-0.3, 0.3, 0.1, -0.1), nrow=2, byrow = TRUE) Q <- matrix(c(-0.3, 0.1, 0.3, -0.1), nrow=2) states_dis <- sim.char(tree, par=Q, model="discrete")[,,] ## ### Fit discrete character models ## Assume forward and backwards rates are equal ("ER" model) fit_sym <- fitDiscrete(tree, states_dis, model="ER") ## Allow the forward and backward rates to be different ("ARD" model) fit_dif <- fitDiscrete(tree, states_dis, model="ARD") ## Compare the models aic_sym <- fit_sym$opt$aic aic_dif <- fit_dif$opt$aic aic_dis <- c(aic_sym, aic_dif) aic_dis <- c(fit_sym$opt$aic, fit_dif$opt$aic) names(aic_dis) <- c("ER", "ARD") aicw(aic_dis) ## ## Exercise ## Read in the Anolis data and fit and compare models of trait evolution anol_phy <- read.tree("datasets/anolis.tre") anol_dat <- read.csv("datasets/anolis.csv", row.names=1)