--- title: "Analysing data with spatial dependence" author: "Peter Solymos and Subhash Lele" date: "July 16, 2016 — Madison, WI — NACCB Congress" layout: course output: pdf_document course: location: Madison year: 2016 title: "Hierarchical Models Made Easy — July 16, 2016 — Madison, WI — NACCB Congress" lecture: Spatial file: spatial previous: pva pdf: spatial.pdf --- ## Kriging example Exponential decay is used ($e^{-\lambda D}$). Try different values for $\lambda$ (0, 0.1, 0.5). Try modifying it to half-Normal ($e^{-(\lambda D)^{2}}$). The models is defined as: $$Y \sim MVN(\mathbf{\mu}, \mathbf{\Sigma})$$ where $\mathbf{\mu}$ is the mean vector for the Multivariate Normal (MVN) distribution, and $\mathbf{\Sigma}$ is the variance-covariance matrix with a spatial dependence structure. $\mathbf{\Sigma}$ is defined as $\sigma^{2} exp(-\lambda \mathbf{D})$, where $\mathbf{D}$ is an $n$-by-$n$ spatial distance matrix (we use Euclidean distances). ```{r} set.seed(2345) library(MASS) mu <- 5 # global mean sigma_sq <- 1 # global variance lambda <- 0.5 ## set up an m x m square lattice m <- 10 xy <- expand.grid(x=seq_len(m), y=seq_len(m)) n <- nrow(xy) D <- as.matrix(dist(xy)) Sigma <- sigma_sq * exp(-lambda*D) Y <- mvrnorm(1, rep(mu, n), Sigma) op <- par(mfrow = c(2, 2)) image(seq_len(n), seq_len(n), D, main = "D", ylab="i", xlab="i") image(seq_len(n), seq_len(n), Sigma, main="Sigma", ylab="i", xlab="i") Distance <- seq(0, m * sqrt(2), by = 0.1) plot(Distance, exp(-lambda*Distance), type = "l") image(seq_len(m), seq_len(m), matrix(Y, m, m), main = "Y", ylab="y", xlab="x") par(op) ``` Bayesian analysis: ```{r} library(dclone) model <- custommodel("model { for (i in 1:n) { for (j in 1:n) { Sigma[i,j] <- sigma_sq * exp(-lambda*D[i,j]) } mu_vec[i] <- mu } Y[1:n] ~ dmnorm(mu_vec, invSigma) invSigma <- inverse(Sigma) log_sigma ~ dnorm(0, 0.001) sigma_sq <- exp(log_sigma)^2 mu ~ dnorm(0, 0.1) lambda ~ dgamma(1, 0.1) }") dat <- list(Y = Y, n = n, D = D) fit <- jags.fit(data = dat, params = c("mu", "sigma_sq","lambda"), model = model, n.iter = 1000) summary(fit) ``` DC: replicating the whole experiment $K$ times (i.e. clones are independent, and identical in terms of within-clone dependence structure) ```{r} library(dclone) model <- custommodel("model { for (k in 1:K) { for (i in 1:n) { for (j in 1:n) { Sigma[i,j,k] <- sigma_sq * exp(-lambda*D[i,j]) } mu_vec[i,k] <- mu # mu_vec does not really require cloning } Y[1:n,k] ~ dmnorm(mu_vec[1:n,k], invSigma[1:n,1:n,k]) invSigma[1:n,1:n,k] <- inverse(Sigma[1:n,1:n,k]) } log_sigma ~ dnorm(0, 0.001) sigma_sq <- exp(log_sigma)^2 mu ~ dnorm(0, 0.1) lambda ~ dgamma(1, 0.1) }") dat <- list(Y = dcdim(data.matrix(Y)), n = n, D = D, K=1) dcfit <- dc.fit(data = dat, params = c("mu", "sigma_sq","lambda"), model = model, n.iter = 1000, n.clones=c(1,2), multiply="K", unchanged=c("n", "D")) summary(dcfit) dcdiag(dcfit) ``` Inverse Wishart prior, $\sigma^{2}$ and $\lambda$ is hard to recover (it requires post processing the posterior estimates, i.e. monitor the whole `invSigma` matrix or its inverse): ```{r} library(dclone) model <- custommodel("model { for (i in 1:n) { mu_vec[i] <- mu } Y[1:n] ~ dmnorm(mu_vec, invSigma) invSigma[1:n,1:n] ~ dwish(R[1:n,1:n], n) mu ~ dnorm(0, 0.1) }") dat <- list(Y = Y, n = n, R = diag(1, n, n)) fit <- jags.fit(data = dat, params = "mu", model = model, n.iter = 1000) summary(fit) ``` DC for inverse Wishart parametrization: ```{r} library(dclone) model <- custommodel("model { for (i in 1:n) { mu_vec[i] <- mu } for (k in 1:K) { Y[1:n,k] ~ dmnorm(mu_vec[1:n], invSigma[1:n,1:n,k]) invSigma[1:n,1:n,k] ~ dwish(R[1:n,1:n], n) } mu ~ dnorm(0, 0.1) }") dat <- list(Y = dcdim(data.matrix(Y)), n = n, R = diag(1, n, n), K=1) dcfit <- dc.fit(data = dat, params = "mu", model = model, n.iter = 1000, n.clones=c(1,2), multiply="K", unchanged=c("n", "R")) summary(dcfit) dcdiag(dcfit) ``` Correlation ($\rho$) based parametrization. Try different values for $\rho$. Same model as above, but now $\mathbf{\Sigma}$ is defined as $\sigma^{2} \rho^{\mathbf{D}})$. ```{r} set.seed(2345) library(MASS) mu <- 5 sigma_sq <- 1 rho <- 0.8 ## set up an m x m square lattice m <- 10 xy <- expand.grid(x=seq_len(m), y=seq_len(m)) n <- nrow(xy) D <- as.matrix(dist(xy)) Sigma <- sigma_sq * rho^D Y <- mvrnorm(1, rep(mu, n), Sigma) op <- par(mfrow = c(2, 2)) image(seq_len(n), seq_len(n), D, main = "D", ylab="i", xlab="i") image(seq_len(n), seq_len(n), Sigma, main="Sigma", ylab="i", xlab="i") Distance <- seq(0, m * sqrt(2), by = 0.1) plot(Distance, rho^Distance, type = "l") image(seq_len(m), seq_len(m), matrix(Y, m, m), main = "Y", ylab="y", xlab="x") par(op) ``` Bayesian analysis: ```{r} library(dclone) model <- custommodel("model { for (i in 1:n) { for (j in 1:n) { Sigma[i,j] <- sigma_sq * rho^D[i,j] } mu_vec[i] <- mu } Y[1:n] ~ dmnorm(mu_vec, invSigma) invSigma <- inverse(Sigma) log_sigma ~ dnorm(0, 0.001) sigma_sq <- exp(log_sigma)^2 mu ~ dnorm(0, 0.1) rho ~ dunif(0, 0.999) }") dat <- list(Y = Y, n = n, D = D) fit <- jags.fit(data = dat, params = c("mu", "sigma_sq","rho"), model = model, n.iter = 1000) summary(fit) ``` DC: ```{r} model <- custommodel("model { for (k in 1:K) { for (i in 1:n) { for (j in 1:n) { Sigma[i,j,k] <- sigma_sq * rho^D[i,j] } mu_vec[i,k] <- mu # mu_vec does not really require cloning } Y[1:n,k] ~ dmnorm(mu_vec[1:n,k], invSigma[1:n,1:n,k]) invSigma[1:n,1:n,k] <- inverse(Sigma[1:n,1:n,k]) } log_sigma ~ dnorm(0, 0.001) sigma_sq <- exp(log_sigma)^2 mu ~ dnorm(0, 0.1) rho ~ dunif(0, 0.999) }") dat <- list(Y = dcdim(data.matrix(Y)), n = n, D = D, K=1) dcfit <- dc.fit(data = dat, params = c("mu", "sigma_sq","rho"), model = model, n.iter = 1000, n.clones=c(1,2), multiply="K", unchanged=c("n", "D")) summary(dcfit) dcdiag(dcfit) ``` ## Cluster sampling design, Poisson-Lognormal GLMM We assume that points within clusters are dependent (i.e. close to each other to assume high dependence), but clusters are independent (i.e. far apart to assume independence). We assume each cluster has same number of points for simplicity, but number of points within cluster can vary. The model for any given cluster is defined as: $(Y_{ij} \mid \lambda_i) \sim Poisson(\lambda_{i})$, $i = 1, 2, \ldots, n$, $j = 1, 2, \ldots, m$, $log(\lambda_i) = \alpha_i + \mu$, and $\mathbf{\alpha_i} \sim MVN(\mathbf{0}, \mathbf{\Sigma})$. $\mathbf{\Sigma}$ is the $m$-by-$m$ variance covariance matrix for cluster $i$. Diagonal elements are defined as $\sigma^{2}$, off-diagonal elements are defined as $\sigma^{2} \rho$. ```{r} set.seed(1234) library(MASS) ## total sample size is n x m m <- 5 # number of points in a cluster n <- 25 # number of clusters mu <- 1.6 # global mean on log scale sigma_sq <- 1 # global variance rho <- 0.8 ## variance-covariance matrix for a single cluster Sigma1 <- sigma_sq * diag(1, m, m) + sigma_sq * rho * (1 - diag(1, m, m)) ## the full variance covariance matrix is 'block-diagonal' ## which means it is filled with zeros across clusters alpha <- matrix(0, n, m) for (i in 1:n) { alpha[i,] <- mvrnorm(1, rep(0, m), Sigma1) } Y <- rpois(n*m, exp(as.numeric(alpha) + mu)) dim(Y) <- dim(alpha) Y boxplot(t(Y), xlab="Clusters", ylab="Y", col="tomato") points(1:n, rowMeans(Y), pch=4, col=4) ``` Bayesian analysis: ```{r} library(dclone) model <- custommodel("model { for (i in 1:n) { for (j in 1:m) { Y[i,j] ~ dpois(lambda[i,j]) lambda[i,j] <- exp(alpha[i,j] + mu) } alpha[i,1:m] ~ dmnorm(zeros, invSigma) } Sigma <- sigma_sq * R + sigma_sq * rho * (1 - R) invSigma <- inverse(Sigma) log_sigma ~ dnorm(0, 0.001) sigma_sq <- exp(log_sigma)^2 mu ~ dnorm(0, 0.1) rho ~ dunif(0, 0.999) }") dat <- list(Y = Y, n = n, m = m, R = diag(1, m, m), zeros = rep(0, m)) fit <- jags.fit(data = dat, params = c("mu", "sigma_sq","rho"), model = model, n.iter = 1000) summary(fit) ``` DC: cloning the whole dat set which means more independent clusters (i.e. not increased levels of replication within cluster) ```{r} library(dclone) model <- custommodel("model { for (k in 1:K) { for (i in 1:n) { for (j in 1:m) { Y[i,j,k] ~ dpois(lambda[i,j,k]) lambda[i,j,k] <- exp(alpha[i,j,k] + mu) } alpha[i,1:m,k] ~ dmnorm(zeros, invSigma) } } Sigma <- sigma_sq * R + sigma_sq * rho * (1 - R) invSigma <- inverse(Sigma) log_sigma ~ dnorm(0, 0.001) sigma_sq <- exp(log_sigma)^2 mu ~ dnorm(0, 0.1) rho ~ dunif(0, 0.999) }") dat <- list(Y = dcdim(array(Y, c(dim(Y), 1))), n = n, m = m, R = diag(1, m, m), zeros = rep(0, m), K=1) dcfit <- dc.fit(data = dat, params = c("mu", "sigma_sq","rho"), model = model, n.iter = 1000, n.clones=c(1,2), unchanged = c("n", "m", "R", "zeros"), multiply="K") summary(dcfit) dcdiag(dcfit) ``` ## Binomial GLMM for clustered data ```{r} set.seed(1234) library(MASS) ## total sample size is n x m m <- 5 # number of points in a cluster n <- 25 # number of clusters mu <- 0 # global mean on logit scale sigma_sq <- 1 # global variance rho <- 0.8 ## variance-covariance matrix for a single cluster Sigma1 <- sigma_sq * diag(1, m, m) + sigma_sq * rho * (1 - diag(1, m, m)) ## the full variance covariance matrix is 'block-diagonal' ## which means it is filled with zeros across clusters alpha <- matrix(0, n, m) for (i in 1:n) { alpha[i,] <- mvrnorm(1, rep(0, m), Sigma1) } Y <- rbinom(n*m, 1, plogis(as.numeric(alpha) + mu)) dim(Y) <- dim(alpha) Y boxplot(t(Y), xlab="Clusters", ylab="Y", col="tomato") points(1:n, rowMeans(Y), pch=4, col=4) ``` Bayesian analysis: ```{r} library(dclone) model <- custommodel("model { for (i in 1:n) { for (j in 1:m) { Y[i,j] ~ dbern(p[i,j]) p[i,j] <- ilogit(alpha[i,j] + mu) } alpha[i,1:m] ~ dmnorm(zeros, invSigma) } Sigma <- sigma_sq * R + sigma_sq * rho * (1 - R) invSigma <- inverse(Sigma) log_sigma ~ dnorm(0, 0.001) sigma_sq <- exp(log_sigma)^2 mu ~ dnorm(0, 0.1) rho ~ dunif(0, 0.999) }") dat <- list(Y = Y, n = n, m = m, R = diag(1, m, m), zeros = rep(0, m)) fit <- jags.fit(data = dat, params = c("mu", "sigma_sq","rho"), model = model, n.iter = 1000) summary(fit) ``` DC: ```{r} library(dclone) model <- custommodel("model { for (k in 1:K) { for (i in 1:n) { for (j in 1:m) { Y[i,j,k] ~ dbern(p[i,j,k]) p[i,j,k] <- ilogit(alpha[i,j,k] + mu) } alpha[i,1:m,k] ~ dmnorm(zeros, invSigma) } } Sigma <- sigma_sq * R + sigma_sq * rho * (1 - R) invSigma <- inverse(Sigma) log_sigma ~ dnorm(0, 0.001) sigma_sq <- exp(log_sigma)^2 mu ~ dnorm(0, 0.1) rho ~ dunif(0, 0.999) }") dat <- list(Y = dcdim(array(Y, c(dim(Y), 1))), n = n, m = m, R = diag(1, m, m), zeros = rep(0, m), K=1) dcfit <- dc.fit(data = dat, params = c("mu", "sigma_sq","rho"), model = model, n.iter = 1000, n.clones=c(1,2), unchanged = c("n", "m", "R", "zeros"), multiply="K") summary(dcfit) dcdiag(dcfit) ```