## Introduction to MCMC 1: Estimating a posterior binomial probability
## Clinic on the Meaningful Modeling of Epidemiological Data
## International Clinics on Infectious Disease Dynamics and Data (ICI3D) Program
## African Institute for Mathematical Sciences, Muizenberg, RSA
## Steve Bellan 2012, 2015
##
###################################################################### 

## By the end of this tutorial you should…

## * Understand how to write a flexible prior function for the binomial
## * Understand the Metropolis-Hasting algorithm and explain it step by step.
## * Know how the parameter proposal distribution affects MCMC convergence
## * Know how to assess MCMC convergence with the Gelman-Rubin diagnostic and with trace plots

library(boot); library(coda)
defaultPar <- par() ## Save the default graphic parameter settings for use later.

## First, let's pretend that we are sampling a population of 100 individuals with a 30% prevalence
## for some disease and testing each of them. From this, we have a sample prevalence, which is our
## estimate of the true prevalence. The problem considered in the simulation is to estimate the
## posterior probability distribution of the prevalence from this sample and from a specified
## (informative or uninformative) prior probability distribution for the prevalence.

size <- 100
truePrev <- .3
sampPos <- rbinom(1, size, truePrev) ## Sample from this distribution once.
sampPrev <- sampPos/size

## Now we need to specify our prior probability distribution. This designates our prior (before we
## conducted the study) beliefs of what we think the prevalence of this population is. Frequently,
## we have insufficient prior information to specify an informative prior. In these cases, we will
## use an uninformative prior, meaning we that we assume a wide range of values is plausible. 

## Our parameter of interest is prevalence, which is bounded between 0 and 1. The beta probability
## distribution is particularly suited for such parameters and is, in fact, the most commonly used
## probability distribution for parameters that are, themselves, probabilities. It is uniform (constand density across probabilities)
## with parameters shape1 = 1, shape2 = 1, and can otherwise have many shapes.

## We calculate both the prior and the likelihood on a log scale to avoid numerical problems
logBetaPrior <- function(prevalence
												 , shape1 = 1  ## Change to make informative (try 8)
												 , shape2 = 1) ## Change to make informative (try 40)
	dbeta(prevalence, shape1 = shape1, shape2 = shape2, log = T)

## The likelihood is simply the probability of observing that many individuals test positive given
## the number tested and a specified prevalence.
logLikelihood <- function(prevalence,
                          data = list(size = size, sampPos = sampPos))
    dbinom(data$sampPos, size = data$size, prob = prevalence, log = T)

## A convenience function that sums the log-likelihood and the log-prior.
logLikePrior <- function(prevalence
                         , shape1 = 1
                         , shape2 = 1
                         , data = list(size = size, sampPos = sampPos))
    logBetaPrior(prevalence, shape1, shape2) + logLikelihood(prevalence, data)

## Convenience functions 
Prior <- function(x) exp(logBetaPrior(x))
Likelihood <- function(x) exp(logLikelihood(x))
LikePrior <- function(x) exp(logLikePrior(x))


## Plots
## Examine these plots, and write down what you think each of them means
par(mfrow = c(2,2) ## panels
    , mar = c(3,6,1,1) ## panel margins
    , bty='n' ## no box around plots
    , oma = c(1.5,0,0,0)) ## outer margins
curve(Prior(x), 0, 1, ylab = 'prior')
curve(Likelihood, 0, 1, ylab = 'likelihood')
curve(LikePrior, 0, 1, ylab = 'likelihood X prior')
curve(logLikePrior, 0,1, ylab = 'log(likelihood X prior)')
mtext('prevalence', 1, 0, outer=T)

## runMCMC runs a Markov chain Monte Carlo (MCMC) Metropolis-Hastings algorithm to
## numerically estimate the posterior probability distribution of the prevalence. For problems this
## simple, the posterior probability distribution can be solved for analytically. However, for
## problems more complex than this one (such as the Introduction to MCMC-SI_HIV tutorial), numerical
## integration using MCMC may be the only way to calculate the posterior probability distribution
## for parameters.

## We sample the prevalence parameter on the logistic-scale so that it is bounded by [-Inf, Inf] and not [0,1]
runMCMC <- function(iterations
                    , startvalue = runif(1, logit(.01), logit(.99)) ## random starting value
                    , proposerSD = .5 ## standard deviation of the gaussian proposal distribution
                    , verbose = 0){ ## for debugging
    if(verbose > 0) browser()
    chain <- array(dim = c(iterations + 1, 1)) ## initialize empty iterations X 1 array
    chain[1,] <- startvalue ## set first value
    for(ii in 1:iterations){ 
        proposal <- rnorm(1, chain[ii,], proposerSD) ## propose next value
        MHratio <- exp(logLikePrior(inv.logit(proposal)) - 
                       logLikePrior(inv.logit(chain[ii,])))
        ## If the MH-ratio is > 1, accept new value. Otherwise, accept it with probability equal to
        ## the the MH-ratio.
        if(runif(1) < MHratio){ 
            chain[ii+1,] <- proposal
        }else{ ## If rejecting it, stay at the last state.
            chain[ii+1,] <- chain[ii,]
        }
    }
    return(inv.logit(chain)) ## Transform the chain of logit-prevalence values back into prevalences
}

posteriorSample <- runMCMC(1000, proposerSD = .1) ## sample 1000 times from posterior
par(defaultPar, bty = 'n')
plot(posteriorSample, xlab='iteration', ylab = 'prevalence', main = 'posterior sample',
     type = 'l', las = 1)

## Compare proposal distributions
par(mfrow = c(2,2), oma = c(0,0,2,0))
for(sdVal in c(.05, .1, .5, 2)) {
    posteriorSample <- runMCMC(1000, proposerSD = sdVal) 
    plot(posteriorSample, xlab='iteration', ylab = 'prevalence',
         main = bquote(sigma==.(sdVal)),
         ylim = c(0,1),
         type = 'l', las = 1)
}
mtext('posterior sample by proposer sd', side = 3, line = 0, outer = T)

####################################################################################################
## Questions
####################################################################################################

## Question 1: Write code to compare histograms of posterior samples
## from proposal distributions with the 4 different proposal standard
## deviations above for 3000 samples. Make sure each histogram has the
## same breaks and x-axis limits.

## Question 2: Sample 4 chains for 3000 iterations using the same
## proposal distribution and plot each chains' trace on the same plot.

## Question 3: Use the Gelman-Rubin diagnostic (gelman.diag) to assess
## whether those four chains have reached convergence. You will need
## to convert the chains from 2 into "mcmc" objects (as.mcmc()), and
## then put them into an mcmc.list (as.mcmc.list())

## Challenge Question: (A) Plot the Gelman-Rubin diagnostic as a function
## of chain length. (B) Make the same plot, but after discarding the first 100
## iterations as a "burnin"