# script for LCA in diagnostic testing
#
# uses Gibbs steps to estimate unknown prevalence, sensitivity and specificity from
# table data.

# Data
#
# number of populations
P <- 2
# number of tests
T <- 2
# counts
#      1++,1+-,1-+,1--,2++,2+-,2-+,2--
#counts <- c(500,100, 50,400,400,120, 10,100)
#           ++,+-,-+,--
counts <- c(500,100, 50,400,400,120, 10,100)

# Priors, assuming Beta distribution with parameters alpha, beta.
p_alpha <- rep(1, P)
p_beta  <- rep(1, P)
s_alpha <- rep(1, T)
s_beta  <- rep(1, T)
c_alpha <- rep(1, T)
c_beta  <- rep(1, T)

# MCMC control
iters  <- 10000
burnin <- 1000
thin   <- 10

# Now run the MCMC model...

# Simple test for data entered correctly
if (length(counts) != P*(2^T)) {
	stop("counts must be of length (number of populations)*2^(number of tests)")
}

# Setup matrices for y and m
y <- matrix(counts, nrow=P, ncol=2^T, byrow=T)
m <- matrix(0, nrow=T, ncol=2^T)
for (i in 1:T)
{
	for (k in 1:(2^T))
	{
		# does 2^T-k have bit T-i set?
		r <- (2^T-k) %/% (2^(T-i));
		if (r %% 2)
			m[i,k] <- 1
	}
}

# initial values
p <- rep(0.5, P) # prevalence
s <- rep(0.5, T) # test sensitivity
c <- rep(0.5, T) # test specificity

# latent true positives for each y
x <- y

# posterior
post <- matrix(NA, iters/thin, P+2*T)
post_names <- rep("",P+2*T)
for (i in 1:P)
	post_names[i] <- paste("Pop",i,"Prev",sep="_")
for (i in 1:T)
	post_names[P+i] <- paste("Test",i,"Sens",sep="_")
for (i in 1:T)
	post_names[P+T+i] <- paste("Test",i,"Spec",sep="_")
colnames(post) <- post_names

# run the MCMC
j <- 1
for (l in 1:(iters + burnin))
{
 	# Gibbs steps:
	# 1. Sample from x_{iM}
	for (i in 1:P) {
		for (k in 1:(2^T)) {
			p1 <- p[i];
			p2 <- (1-p[i]);
			for (t in 1:T) {
				if (m[t,k]) {
					p1 <- p1*s[t];
					p2 <- p2*(1-c[t]);
				} else {
					p1 <- p1*(1-s[t]);
					p2 <- p2*c[t];
				}
			}
			x[i,k] = rbinom(1, y[i,k], p1/(p1+p2))
		}
	}
	# 2. Sample from p_i
	for (i in 1:P) {
#		sx <- 0;
#		sy <- 0;
#		for (k in 1:(2^T)) {
#			sx <- sx + x[i,k];
#			sy <- sy + y[i,k];
#		}
		# optimisation:
		sx <- sum(x[i,])
		sy <- sum(y[i,])
		p[i] <- rbeta(1, p_alpha[i] + sx, p_beta[i] + sy - sx);
	}
	# 3. Sample from s_t
	for (t in 1:T) {
		alpha <- s_alpha[t];
		beta  <- s_beta[t];
		for (i in 1:P) {
			# optimisation
			alpha <- alpha + sum(m[t,]*x[i,])
			beta  <- beta  + sum((1-m[t,])*x[i,])
#			for (k in 1:(2^T)) {
#				if (m[t,k]) {
#					alpha <- alpha + x[i,k];
#				} else {
#					beta <- beta + x[i,k];
#				}
#			}
		}
		s[t] <- rbeta(1, alpha, beta);
	}
	# 4. Sample from c_t
	for (t in 1:T) {
		alpha <- c_alpha[t];
		beta  <- c_beta[t];
		for (i in 1:P) {
			# optimisation
			alpha <- alpha + sum((1-m[t,])*(y[i,]-x[i,]))
			beta  <- beta  + sum(m[t,]*(y[i,]-x[i,]))
#			for (k in 1:(2^T)) {
#				if (m[t,k]) {
#					beta <- beta + y[i,k] - x[i,k];
#				} else {
#					alpha <- alpha + y[i,k] - x[i,k];
#				}
#			}
		}
		c[t] <- rbeta(1, alpha, beta);
	}
	# store sample
	if (l > burnin && (l-burnin) %% thin == 0) {
		post[j,] <- c(p, s, c)
		j <- j + 1;
	}
}

# Plot posteriors
par(mfrow=c(3,2))
plot(post[,1], type="l", ylim=c(0,1), main="Prevalence traces")
if (P > 1) {
	for (i in 2:P)
		lines(post[,i], col=i)
}
d <- list(); xlim = NULL; ylim = NULL
for (i in 1:P) {
	d[[i]] <- density(post[,i])
	xlim = range(xlim, d[[i]]$x)
	ylim = range(ylim, d[[i]]$y)
}
plot(d[[1]], ylim=ylim, xlim=xlim, main="Prevalence density")
if (P > 1) {
	for (i in 2:P) {
		lines(d[[i]], col=i)
	  # and the prior :)
	  x = seq(xlim[1], xlim[2], length.out=100)
	  y = dbeta(x, p_alpha[i], p_beta[i])
	  lines(x, y, col=i, lty="dotted")
	}
}

plot(post[,P+1], type="l", ylim=c(0,1), main="Sensitivity traces")
if (T > 1) {
	for (t in 2:T)
		lines(post[,P+t], col=t)
}
d <- list(); xlim = NULL; ylim = NULL
for (t in 1:T) {
	d[[t]] <- density(post[,P+t])
	xlim = range(xlim, d[[t]]$x)
	ylim = range(ylim, d[[t]]$y)
}
plot(d[[1]], ylim=ylim, xlim=xlim, main="Sensitivity density")
if (T > 1) {
	for (t in 2:T) {
		lines(d[[t]], col=t)
	  # and the prior :)
	  x = seq(xlim[1], xlim[2], length.out=100)
	  y = dbeta(x, s_alpha[t], s_beta[t])
	  lines(x, y, col=t, lty="dotted")
	}
}

plot(post[,P+T+1], type="l", ylim=c(0,1), main="Specificity traces")
if (T > 1) {
	for (t in 2:T)
		lines(post[,P+T+t], col=t)
}
d <- list(); xlim = NULL; ylim = NULL
for (t in 1:T) {
	d[[t]] <- density(post[,P+T+t])
	xlim = range(xlim, d[[t]]$x)
	ylim = range(ylim, d[[t]]$y)
}
plot(d[[1]], ylim=ylim, xlim=xlim, main="Specificity density")
if (T > 1) {
	for (t in 2:T) {
		lines(d[[t]], col=t)
	  x = seq(xlim[1], xlim[2], length.out=100)
	  y = dbeta(x, s_alpha[t], s_beta[t])
	  lines(x, y, col=t, lty="dotted")
	}
}

# summary
tab <- apply(post, 2, function(x) { c(mean(x), sd(x), quantile(x, c(0.025, 0.975))) })
rownames(tab) <- c("mean", "sd", "2.5%", "97.5%")
print(tab)

# NOTES ON HOW IT ALL WORKS...

# Let M = (01001) be a binary string representing a test result across multiple tests, where
# a 1 specifies a positive test and a 0 specifies a negative.

# Let y_{iM} be the test result from pattern M in population i.
# Let p_{i} be the prevalence on populationi i.
# Let s_{t} be the test sensitivity of test t.
# Let c_{t} be the test specificity of test t.

# Then the likelihood of y given p, s and c may be written
#
# L(y | p, s, c) = \prod_{i=1}^P \prod_M \in Z_T [p_i\prod_{t=1}^T s_t^M_t(1-s_t)^(1-M_t) + (1-p_i)\prod_{t=1}^T (1-c_t)^M_tc_t^(1-M_t)]^y_{iM}.
# given that we don't know the actual latent times $x_i$
#
# where Z_{tM} = 0 if M contains a 1 at position t, 1 if M contains a 1 at position t.
#
# To proceed, let x_{iM} be the number of true positives that report test pattern M in population i.
# Then x_{iM} are unknown, but can be incorporated into the likelihood at latent variables to setup an
# appropriate gibbs sampler. The likelihood becomes
#
# L(y | p, s, c) = \prod_{i=1}^P \prod_M \in Z_T \sum_{x_{iM}=0}^{y_{iM}} [p_i\prod_{t=1}^T s_t^{M_t}(1-s_t)^{1-M_t}]^{x_{iM}}[(1-p_i)\prod_{t=1}^T (1-c_t)^{M_t}c_t^{1-M_t}]^{y_{iM}-x_{iM}}
# conditional on the $x_{iM}$'s then,
#
# L(y | p, s, c, x) = \prod_{i=1}^P \prod_M \in Z_T [p_i\prod_{t=1}^T s_t^{M_t}(1-s_t)^{1-M_t}]^{x_{iM}}[(1-p_i)\prod_{t=1}^T (1-c_t)^{M_t}c_t^{1-M_t}]^{y_{iM}-x_{iM}}
#                   = \prod_{i=1}^P \prod_M \in Z_T p_i^{x_iM} \prod_{t=1}^T s_t^{M_tx_{iM}}(1-s_t)^{(1-M_t)^{x_{iM}} (1-p_i)^{y_iM}-x_{iM}} \prod_{t=1}^T (1-c_t)^{M_t(y_{iM}-x_{iM})}c_t^{(1-M_t)(y_{iM}-x_{iM)}.
#                   = \prod_{i=1}^P p_i^{\sum_M x_iM} (1-p_i)^{\sum_M y_{iM}-x_{iM}} \prod_{t=1}^T s_t^{\sum_M M_tx_{iM}}(1-s_t)^{\sum_M (1-M_t)^{x_{iM}} \prod_{t=1}^T (1-c_t)^{\sum_M M_t(y_{iM}-x_{iM})}c_t^{\sum_M (1-M_t)(y_{iM}-x_{iM)}.
#
# Assuming Beta priors, the conditional posterior of p, s, c are also Beta:
#
# p(p_i | s, c, x, y) \sim Beta(\sum_M x_{iM} + \alpha_{p_i}, \sum_M y_{iM}-x_{iM} + \beta_{p_i})
# 
# p(s_t | p, c, x, y) \sim Beta(\sum_M\sum_i M_t x_{iM} + \alpha_{s_t}, \sum_M\sum_i (1-M_t)x_{iM} + \beta_{s_t})
#
# p(c_t | s, c, x, y) \sim Beta(\sum_M\sum_i (1-M_t)(y_{iM}-x_{iM}) + \alpha_{c_t}, \sum_M\sum_i M_t(y_{iM}-x_{iM}) + \beta_{c_t})
#
# Finally, the conditional posterior of x is Binomial:
#
# p(x_{iM} | p, s, c, y) \sim Binomial(y_{iM}, \frac{a_{iM}}{a_{iM} + b_{iM}})
#
# where
#  a_{iM} = p_i\prod_{t=1}^T s_t^{M_t}(1-s_t)^{1-M_t}
#  b_{iM} = (1-p_i)\prod_{t=1}^T (1-c_t)^{M_t}c_t^{1-M_t}
#