---
title: "Abundance models with detection error"
author: "Peter Solymos and Subhash Lele"
date: "July 16, 2016 &mdash; Madison, WI &mdash; NACCB Congress"
layout: course
output: pdf_document
course:
  location: Madison
  year: 2016
  title: "Hierarchical Models Made Easy &mdash; July 16, 2016 &mdash; Madison, WI &mdash; NACCB Congress"
  lecture: Abundance
  file: abundance
  previous: occupancy
  next: lmm
  pdf: abundance.pdf
---

We can easily generalize this to model abundance surveys. The N-mixture model is the simplest (though unrealistic in practice). 

### Assumptions

* Replicate surveys, 
* independence, 
* closed population.

### Specification of the hierarchical model

* True abundance model:  $N_i \sim Poisson(\lambda)$ for locations $i=1,2, \ldots, n$. 
* Observation model: $(Y_{i,t} \mid N_i) \sim Binomial(N_i, p)$ for visits $t=1,2, \ldots, T$.

```{r,dev='png'}
set.seed(1234)
n <- 200
T <- 1
p <- 0.6
lambda <- 4.2
N <- rpois(n = n, lambda = lambda)
Y <- matrix(NA, n, T)
for (t in 1:T) {
    Y[,t] <- rbinom(n = n, size = N, prob = p)
}
table(N = N, Y = apply(Y, 1, max))
```

```{r,dev='png'}
library(dclone)
library(rjags)
model <- custommodel("model {
    for (i in 1:n) {
        N[i] ~ dpois(lambda)
        for (t in 1:T) {
            Y[i,t] ~ dbin(p, N[i])
        }
    }
    p ~ dunif(0.001, 0.999)
    lambda ~ dlnorm(0, 0.001)
}")
dat <- list(Y = Y, n = n, T = T)
ini <- list(N = apply(Y, 1, max) + 1)
fit <- jags.fit(data = dat, params = c("p", "lambda"), 
    n.update = 10000,
    model = model, inits = ini)
summary(fit)
gelman.diag(fit)
plot(fit)
pairs(fit)
```

```{r,dev='png'}
ifun <- function(model, n.clones) {
    dclone(list(N = apply(Y, 1, max) + 1), n.clones)
}
dcfit <- dc.fit(data = dat, 
    params = c("p", "lambda"), model = model,
    inits = ini, initsfun = ifun,
    n.clones = c(1, 2, 4, 8), 
    unchanged = "T", multiply = "n")
summary(dcfit)
plot(dcfit)
dctable(dcfit)
plot(dctable(dcfit))
dcdiag(dcfit)
plot(dcdiag(dcfit))
pairs(dcfit)
```

As before it is easy to include covariates in the models. There are various extensions and modifications proposed to this basic model. See Lele et al., Solymos et al, Solymos and Lele, Dail and Madsen. (Here we can advertise our work on single survey method and the poster.)

Single visit abundance model with covariates:

```{r,dev='png'}
set.seed(1234)
n <- 200
x <- rnorm(n)
z <- rnorm(n)
beta <- c(0.9, 0.5)
theta <- c(0.8, -0.5)
Z <- model.matrix(~z)
X <- model.matrix(~x)
p <- plogis(Z %*% theta)
lambda <- exp(X %*% beta)
N <- rpois(n = n, lambda = lambda)
Y <- rbinom(n = n, size = N, prob = p)
table(N = N, Y = Y)
## naive abundance parameter estimates
m <- glm(Y ~ x, family = poisson("log"))
coef(m)

library(detect)
md <- svabu(Y ~ x | z, zeroinfl = FALSE)
coef(md)
```

```{r,dev='png'}
model <- custommodel("model {
    for (i in 1:n) {
        N[i] ~ dpois(lambda[i])
        Y[i] ~ dbin(p[i], N[i])
        log(lambda[i]) <- inprod(X[i,], beta)
        logit(p[i]) <- inprod(Z[i,], theta)
    }
    for (j in 1:px) {
        beta[j] ~ dnorm(naive[j], 0.1)
    }
    for (j in 1:pz) {
        theta[j] ~ dnorm(0, 0.01)
    }
}")
dat <- list(Y = Y, n = n, X = X, Z = Z, 
    px = ncol(X), pz = ncol(Z), naive = coef(m))
ini <- list(N = Y + 1)
fit <- jags.fit(data = dat, params = c("beta", "theta"), 
    n.update = 5000,
    model = model, inits = ini)
## DC
ifun <- function(model, n.clones) {
    dclone(list(N = Y + 1), n.clones)
}
dcfit <- dc.fit(data = dat, 
    params = c("beta", "theta"), model = model,
    inits = ini, initsfun = ifun,
    n.clones = c(1, 2, 4, 8), 
#    n.update = 5000,
    unchanged = c("px", "pz", "naive"), multiply = "n")
summary(dcfit)
dcdiag(dcfit)
```

## Learning with DC

```{r,dev='png'}
model <- custommodel("model {
    for (i in 1:n) {
        N[i] ~ dpois(lambda[i])
        Y[i] ~ dbin(p[i], N[i])
        log(lambda[i]) <- inprod(X[i,], beta)
        logit(p[i]) <- inprod(Z[i,], theta)
    }

    cf[1:(px + pz)] ~ dmnorm(pr[,1], pr[,2:(px + pz + 1)])
    beta <- cf[1:px]
    theta <- cf[(px + 1):(px + pz)]
}")
dat <- list(Y = Y, n = n, X = X, Z = Z, 
    px = ncol(X), pz = ncol(Z), 
    pr = unname(cbind(c(coef(m), rep(0, ncol(Z))),
        diag(0.01, ncol(X) + ncol(Z)))))
ini <- list(N = Y + 1)
ifun <- function(model, n.clones) {
    dclone(list(N = Y + 1), n.clones)
}
## function to update prior
## defined as Multivariate Normal distribution
ufun <- function(model, n.clones) {
    cbind(coef(model), solve(vcov(model)))
}
dcfit <- dc.fit(data = dat, 
    params = c("beta", "theta"), model = model,
    inits = ini, initsfun = ifun, 
    update = "pr", updatefun = ufun,
    n.clones = c(1, 2, 4, 8), 
    n.update = 5000,
    unchanged = c("px", "pz", "pr"), multiply = "n")
summary(dcfit)
dcdiag(dcfit)
```

### Zero-inflated Poisson latent process

This really becomes an issue when $T = 1$.
With $T > 1$ it is much easier to distinguish
non occupied ($O_i = 0$ or $N_i = 0 | O_i = 1$) locations
when all the detection history is 0,
and non-detections when some of the detection history is >0
if $p$ is not too small.

```{r,dev='png'}
set.seed(1234)
n <- 100
T <- 2
p <- 0.6
lambda <- 3.5
q <- 0.25
O <- rbinom(n, size = 1, prob = q)
N <- O * rpois(n = n, lambda = lambda)
Y <- matrix(NA, n, T)
for (t in 1:T) {
    Y[,t] <- rbinom(n = n, size = N, prob = p)
}
table(N = N, Y = apply(Y, 1, max))
```

```{r,dev='png'}
library(dclone)
model <- custommodel("model {
    for (i in 1:n) {
        O[i] ~ dbern(q)
        N[i] ~ dpois(lambda * O[i])
        for (t in 1:T) {
            Y[i,t] ~ dbin(p, N[i])
        }
    }
    p ~ dunif(0.001, 0.999)
    lambda ~ dlnorm(0, 0.001)
    q ~ dunif(0.001, 0.999)
}")
dat <- list(Y = Y, n = n, T = T)
## initial values are trickier
ini <- list(N = ifelse(rowSums(Y) > 0, 1, 0) * (apply(Y, 1, max) + 1),
    O = ifelse(rowSums(Y) > 0, 1, 0))
fit <- jags.fit(data = dat, params = c("p", "lambda", "q"), 
    n.update = 10000,
    model = model, inits = ini)
summary(fit)
gelman.diag(fit)
plot(fit)
pairs(fit)
```


Data cloning for zero inflated data: issues might arise with parent values, 
that is why we do conditional likelihood estimation.
It is also possible to use data vloning and JAGS for 
conditional likelihood estimation as explained in
Solymos et al. 2012 ([PDF](https://drive.google.com/open?id=0B-q59n6LIwYPOTNqaWZnSlNYZWc)).


## Poisson-Poisson mixture

This modification can be suitable in cases when there is e.g.
double counting of individuals, or false positives.

```{r,dev='png'}
set.seed(1234)
n <- 200
x <- rnorm(n)
z <- rnorm(n)
beta <- c(0.9, 0.5)
theta <- c(0.8, -0.5)
Z <- model.matrix(~z)
X <- model.matrix(~x)
p <- plogis(Z %*% theta)
lambda <- exp(X %*% beta)
N <- rpois(n = n, lambda = lambda)
Y <- rpois(n = n, lambda = p * N)
table(N = N, Y = Y)
m <- glm(Y ~ x, family = poisson("log"))

## N is of interest for e.g. prediction
model1 <- custommodel("model {
    for (i in 1:n) {
        N[i] ~ dpois(lambda[i])
        #Y[i] ~ dbin(p[i], N[i])
        Y[i] ~ dpois(p[i] * N[i]) # this is the change
        log(lambda[i]) <- inprod(X[i,], beta)
        logit(p[i]) <- inprod(Z[i,], theta)
    }
    for (j in 1:px) {
        beta[j] ~ dnorm(naive[j], 0.1)
    }
    for (j in 1:pz) {
        theta[j] ~ dnorm(0, 0.01)
    }
}")
## more efficient when N is not of interest
model2 <- custommodel("model {
    for (i in 1:n) {
        Y[i] ~ dpois(p[i] * lambda[i])
        log(lambda[i]) <- inprod(X[i,], beta)
        logit(p[i]) <- inprod(Z[i,], theta)
    }
    for (j in 1:px) {
        beta[j] ~ dnorm(naive[j], 0.1)
    }
    for (j in 1:pz) {
        theta[j] ~ dnorm(0, 0.01)
    }
}")
dat <- list(Y = Y, n = n, X = X, Z = Z, 
    px = ncol(X), pz = ncol(Z), naive = coef(m))
ini <- list(N = Y + 1)
fit1 <- jags.fit(data = dat, params = c("beta", "theta"), 
    n.update = 5000, model = model1, inits = ini)
fit2 <- jags.fit(data = dat, params = c("beta", "theta"), 
    n.update = 5000, model = model2)
coef(fit1)
coef(fit2)
## DC
ifun <- function(model, n.clones) {
    dclone(list(N = Y + 1), n.clones)
}
dcfit1 <- dc.fit(data = dat, 
    params = c("beta", "theta"), model = model1,
    inits = ini, initsfun = ifun,
    n.clones = c(1, 2, 4, 8), 
#    n.update = 5000,
    unchanged = c("px", "pz", "naive"), multiply = "n")
dcfit2 <- dc.fit(data = dat, 
    params = c("beta", "theta"), model = model2,
    n.clones = c(1, 2, 4, 8), 
#    n.update = 5000,
    unchanged = c("px", "pz", "naive"), multiply = "n")
coef(dcfit1)
coef(dcfit2)
```