--- title: "Abundance models with detection error" author: "Peter Solymos and Subhash Lele" date: "July 16, 2016 — Madison, WI — NACCB Congress" layout: course output: pdf_document course: location: Madison year: 2016 title: "Hierarchical Models Made Easy — July 16, 2016 — Madison, WI — NACCB Congress" lecture: Abundance file: abundance previous: occupancy next: lmm pdf: abundance.pdf --- We can easily generalize this to model abundance surveys. The N-mixture model is the simplest (though unrealistic in practice). ### Assumptions * Replicate surveys, * independence, * closed population. ### Specification of the hierarchical model * True abundance model: $N_i \sim Poisson(\lambda)$ for locations $i=1,2, \ldots, n$. * Observation model: $(Y_{i,t} \mid N_i) \sim Binomial(N_i, p)$ for visits $t=1,2, \ldots, T$. ```{r,dev='png'} set.seed(1234) n <- 200 T <- 1 p <- 0.6 lambda <- 4.2 N <- rpois(n = n, lambda = lambda) Y <- matrix(NA, n, T) for (t in 1:T) { Y[,t] <- rbinom(n = n, size = N, prob = p) } table(N = N, Y = apply(Y, 1, max)) ``` ```{r,dev='png'} library(dclone) library(rjags) model <- custommodel("model { for (i in 1:n) { N[i] ~ dpois(lambda) for (t in 1:T) { Y[i,t] ~ dbin(p, N[i]) } } p ~ dunif(0.001, 0.999) lambda ~ dlnorm(0, 0.001) }") dat <- list(Y = Y, n = n, T = T) ini <- list(N = apply(Y, 1, max) + 1) fit <- jags.fit(data = dat, params = c("p", "lambda"), n.update = 10000, model = model, inits = ini) summary(fit) gelman.diag(fit) plot(fit) pairs(fit) ``` ```{r,dev='png'} ifun <- function(model, n.clones) { dclone(list(N = apply(Y, 1, max) + 1), n.clones) } dcfit <- dc.fit(data = dat, params = c("p", "lambda"), model = model, inits = ini, initsfun = ifun, n.clones = c(1, 2, 4, 8), unchanged = "T", multiply = "n") summary(dcfit) plot(dcfit) dctable(dcfit) plot(dctable(dcfit)) dcdiag(dcfit) plot(dcdiag(dcfit)) pairs(dcfit) ``` As before it is easy to include covariates in the models. There are various extensions and modifications proposed to this basic model. See Lele et al., Solymos et al, Solymos and Lele, Dail and Madsen. (Here we can advertise our work on single survey method and the poster.) Single visit abundance model with covariates: ```{r,dev='png'} set.seed(1234) n <- 200 x <- rnorm(n) z <- rnorm(n) beta <- c(0.9, 0.5) theta <- c(0.8, -0.5) Z <- model.matrix(~z) X <- model.matrix(~x) p <- plogis(Z %*% theta) lambda <- exp(X %*% beta) N <- rpois(n = n, lambda = lambda) Y <- rbinom(n = n, size = N, prob = p) table(N = N, Y = Y) ## naive abundance parameter estimates m <- glm(Y ~ x, family = poisson("log")) coef(m) library(detect) md <- svabu(Y ~ x | z, zeroinfl = FALSE) coef(md) ``` ```{r,dev='png'} model <- custommodel("model { for (i in 1:n) { N[i] ~ dpois(lambda[i]) Y[i] ~ dbin(p[i], N[i]) log(lambda[i]) <- inprod(X[i,], beta) logit(p[i]) <- inprod(Z[i,], theta) } for (j in 1:px) { beta[j] ~ dnorm(naive[j], 0.1) } for (j in 1:pz) { theta[j] ~ dnorm(0, 0.01) } }") dat <- list(Y = Y, n = n, X = X, Z = Z, px = ncol(X), pz = ncol(Z), naive = coef(m)) ini <- list(N = Y + 1) fit <- jags.fit(data = dat, params = c("beta", "theta"), n.update = 5000, model = model, inits = ini) ## DC ifun <- function(model, n.clones) { dclone(list(N = Y + 1), n.clones) } dcfit <- dc.fit(data = dat, params = c("beta", "theta"), model = model, inits = ini, initsfun = ifun, n.clones = c(1, 2, 4, 8), # n.update = 5000, unchanged = c("px", "pz", "naive"), multiply = "n") summary(dcfit) dcdiag(dcfit) ``` ## Learning with DC ```{r,dev='png'} model <- custommodel("model { for (i in 1:n) { N[i] ~ dpois(lambda[i]) Y[i] ~ dbin(p[i], N[i]) log(lambda[i]) <- inprod(X[i,], beta) logit(p[i]) <- inprod(Z[i,], theta) } cf[1:(px + pz)] ~ dmnorm(pr[,1], pr[,2:(px + pz + 1)]) beta <- cf[1:px] theta <- cf[(px + 1):(px + pz)] }") dat <- list(Y = Y, n = n, X = X, Z = Z, px = ncol(X), pz = ncol(Z), pr = unname(cbind(c(coef(m), rep(0, ncol(Z))), diag(0.01, ncol(X) + ncol(Z))))) ini <- list(N = Y + 1) ifun <- function(model, n.clones) { dclone(list(N = Y + 1), n.clones) } ## function to update prior ## defined as Multivariate Normal distribution ufun <- function(model, n.clones) { cbind(coef(model), solve(vcov(model))) } dcfit <- dc.fit(data = dat, params = c("beta", "theta"), model = model, inits = ini, initsfun = ifun, update = "pr", updatefun = ufun, n.clones = c(1, 2, 4, 8), n.update = 5000, unchanged = c("px", "pz", "pr"), multiply = "n") summary(dcfit) dcdiag(dcfit) ``` ### Zero-inflated Poisson latent process This really becomes an issue when $T = 1$. With $T > 1$ it is much easier to distinguish non occupied ($O_i = 0$ or $N_i = 0 | O_i = 1$) locations when all the detection history is 0, and non-detections when some of the detection history is >0 if $p$ is not too small. ```{r,dev='png'} set.seed(1234) n <- 100 T <- 2 p <- 0.6 lambda <- 3.5 q <- 0.25 O <- rbinom(n, size = 1, prob = q) N <- O * rpois(n = n, lambda = lambda) Y <- matrix(NA, n, T) for (t in 1:T) { Y[,t] <- rbinom(n = n, size = N, prob = p) } table(N = N, Y = apply(Y, 1, max)) ``` ```{r,dev='png'} library(dclone) model <- custommodel("model { for (i in 1:n) { O[i] ~ dbern(q) N[i] ~ dpois(lambda * O[i]) for (t in 1:T) { Y[i,t] ~ dbin(p, N[i]) } } p ~ dunif(0.001, 0.999) lambda ~ dlnorm(0, 0.001) q ~ dunif(0.001, 0.999) }") dat <- list(Y = Y, n = n, T = T) ## initial values are trickier ini <- list(N = ifelse(rowSums(Y) > 0, 1, 0) * (apply(Y, 1, max) + 1), O = ifelse(rowSums(Y) > 0, 1, 0)) fit <- jags.fit(data = dat, params = c("p", "lambda", "q"), n.update = 10000, model = model, inits = ini) summary(fit) gelman.diag(fit) plot(fit) pairs(fit) ``` Data cloning for zero inflated data: issues might arise with parent values, that is why we do conditional likelihood estimation. It is also possible to use data vloning and JAGS for conditional likelihood estimation as explained in Solymos et al. 2012 ([PDF](https://drive.google.com/open?id=0B-q59n6LIwYPOTNqaWZnSlNYZWc)). ## Poisson-Poisson mixture This modification can be suitable in cases when there is e.g. double counting of individuals, or false positives. ```{r,dev='png'} set.seed(1234) n <- 200 x <- rnorm(n) z <- rnorm(n) beta <- c(0.9, 0.5) theta <- c(0.8, -0.5) Z <- model.matrix(~z) X <- model.matrix(~x) p <- plogis(Z %*% theta) lambda <- exp(X %*% beta) N <- rpois(n = n, lambda = lambda) Y <- rpois(n = n, lambda = p * N) table(N = N, Y = Y) m <- glm(Y ~ x, family = poisson("log")) ## N is of interest for e.g. prediction model1 <- custommodel("model { for (i in 1:n) { N[i] ~ dpois(lambda[i]) #Y[i] ~ dbin(p[i], N[i]) Y[i] ~ dpois(p[i] * N[i]) # this is the change log(lambda[i]) <- inprod(X[i,], beta) logit(p[i]) <- inprod(Z[i,], theta) } for (j in 1:px) { beta[j] ~ dnorm(naive[j], 0.1) } for (j in 1:pz) { theta[j] ~ dnorm(0, 0.01) } }") ## more efficient when N is not of interest model2 <- custommodel("model { for (i in 1:n) { Y[i] ~ dpois(p[i] * lambda[i]) log(lambda[i]) <- inprod(X[i,], beta) logit(p[i]) <- inprod(Z[i,], theta) } for (j in 1:px) { beta[j] ~ dnorm(naive[j], 0.1) } for (j in 1:pz) { theta[j] ~ dnorm(0, 0.01) } }") dat <- list(Y = Y, n = n, X = X, Z = Z, px = ncol(X), pz = ncol(Z), naive = coef(m)) ini <- list(N = Y + 1) fit1 <- jags.fit(data = dat, params = c("beta", "theta"), n.update = 5000, model = model1, inits = ini) fit2 <- jags.fit(data = dat, params = c("beta", "theta"), n.update = 5000, model = model2) coef(fit1) coef(fit2) ## DC ifun <- function(model, n.clones) { dclone(list(N = Y + 1), n.clones) } dcfit1 <- dc.fit(data = dat, params = c("beta", "theta"), model = model1, inits = ini, initsfun = ifun, n.clones = c(1, 2, 4, 8), # n.update = 5000, unchanged = c("px", "pz", "naive"), multiply = "n") dcfit2 <- dc.fit(data = dat, params = c("beta", "theta"), model = model2, n.clones = c(1, 2, 4, 8), # n.update = 5000, unchanged = c("px", "pz", "naive"), multiply = "n") coef(dcfit1) coef(dcfit2) ```