---
title: "MeasurementError"
author: "Brian C. O’Meara"
date: "6/13/2019"
output:
  html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, cache=TRUE)
library(OUwie)
library(plyr)
library(parallel)
library(knitr)
library(ggplot2)
```

## Measurement error

Let's do some investigations to see how measurement error could matter.

First set up the model parameters

```{r parameters, include=TRUE, echo=TRUE}
data(sim.ex, package="OUwie")
alpha=rep(2,2)
sigma.sq=rep(.45, 2)
theta0=10
theta=rep(10,2)
```

Next, make up the data

```{r simdata, include=TRUE, echo=TRUE}
sim.data<-OUwie::OUwie.sim(tree, trait, simmap.tree=FALSE,
  scaleHeight=FALSE, alpha=alpha, sigma.sq=sigma.sq,
  theta0=theta0, theta=theta)
sim.data$mserr <- 0 #add a measurement error column
```

Look to see if it's ok. We could just `print()` it, but `knitr::kable()` makes it prettier.

```{r datacheck, include=TRUE, echo=TRUE}
knitr::kable(sim.data)
```

Now, let's think about a range of combinations we could try.

```{r combos, include=TRUE, echo=TRUE}
mserr.vector <- seq(from=0, to=.25*max(sim.data$X), length.out=10)
models.vector <- c("BM1", "OU1")
mserr.argument.vector <- c("none", "known")
all.analysis.combinations <- expand.grid(model=models.vector, mserr = mserr.argument.vector, stringsAsFactors = FALSE)
knitr::kable(all.analysis.combinations)
```

We're going to make some functions to make running this all easier. But you won't see this in the final rendered document.

```{r functions, include=FALSE, echo=FALSE}

#' Do a single OUwie run for a model and summarize the results
#' @param model Model name, as for OUwie
#' @param mserr "known" or "none" as for OUwie
#' @param new.data OUwie-formatted data.frame
#' @param phy Phylo object with node labels
#' @return Data.frame summarizing this run
ouwie.single.run <- function(model, mserr, new.data, phy) {
  result <- OUwie(phy, new.data, model=model, mserr=mserr)
  result.df <- data.frame(mserr=mserr, alpha=result$solution['alpha',1], sigma.sq=result$solution['sigma.sq',1], theta=result$theta[1,1] , AICc=result$AICc, model=model, stringsAsFactors = FALSE)
  return(result.df)
}

#' Compute everything for a given mserr.value. Note we use the same dataset for each combo
#' @param mserr.value The value of measurement error
#' @param all.combos The data.frame of all analysis combinations
#' @param new.data The simulated data
#' @param phy Phylo object with node labels
#' @return A data.frame of results for each element in the combinations
compute.w.error <- function(mserr.value, all.combos = all.analysis.combinations, new.data = sim.data, phy=tree) {
  new.data$X <- rnorm(length(new.data$X), mean=new.data$X, sd=mserr.value)
  new.data$mserr <- mserr.value
  all.result.df <- data.frame()
  for (i in sequence(nrow(all.combos))) {
    # Note that this is slow: it has to keep adding to the object in memory. Look at plyr or dplyr for faster ways to do this
    all.result.df <- rbind(all.result.df, ouwie.single.run(model=all.combos$model[i], mserr=all.combos$mserr[i],  new.data=new.data, phy=phy))
  }
  rownames(all.result.df)<-NULL
  all.result.df$mserr.value = mserr.value
  all.result.df$delta.AICc = NA
  all.result.df$delta.AICc[which(all.result.df$mserr=="none")] <- all.result.df$AICc[which(all.result.df$mserr=="none")] - min(all.result.df$AICc[which(all.result.df$mserr=="none")])
  all.result.df$delta.AICc[which(all.result.df$mserr=="known")] <- all.result.df$AICc[which(all.result.df$mserr=="known")] - min(all.result.df$AICc[which(all.result.df$mserr=="known")]) # note the danger here: what if I'd forgotten to change a none to a known? A two element loop would probably be safer
  return(all.result.df)
}
```

Now, we do a bunch of runs.


```{r do_the_analysis, include=TRUE, echo=TRUE}
all.results <- plyr::rbind.fill(parallel::mclapply(mserr.vector, 
compute.w.error, all.combos = all.analysis.combinations, new.data = sim.data, phy=tree))

all.results$mserr.fraction <- all.results$mserr.value / max(sim.data$X)
all.results$model_type <- paste(all.results$model, all.results$mserr, sep="_")
```

## Plot the results

### AIC 

```{r plot1, include=TRUE, echo=FALSE}
ggplot2::ggplot(data=all.results, aes(x=mserr.value, y=delta.AICc, colour=model_type)) + ggplot2::geom_line(alpha=0.9) + ggplot2::scale_color_viridis_d(end=0.8)
```

## Sigma-squared

```{r sigmaplot, include=TRUE, echo=FALSE}
ggplot2::ggplot(data=all.results, aes(x=mserr.value, y=sigma.sq, colour=model_type)) + ggplot2::geom_line() + ggplot2::scale_color_viridis_d(end=0.8) + ggplot2::scale_y_log10() + ggplot2::geom_hline(yintercept=sigma.sq[1], linetype="dotted", color="black")
```

## Alpha

```{r alphaplot, include=TRUE, echo=FALSE}
alpha_data <- subset(all.results, model=="OU1")
ggplot2::ggplot(data=alpha_data, aes(x=mserr.value, y=alpha, colour=model_type)) + ggplot2::geom_line(alpha=0.5) + ggplot2::scale_color_viridis_d(end=0.8)
```