# Author: Filip Ekström Kelvinius, Statistics and Machine Learning, Linköping university, Sweden # e-mail: filip.ekstrom@liu.se geometric_likelihood= function(x, theta) { n = length(x) return(theta^n *(1-theta)^(sum(x) - n)) } beta_prior = function(theta, alpha, beta) { return(dbeta(theta, alpha, beta)) } beta_posterior = function(theta, alpha_prior, beta_prior, x) { n = length(x) return(dbeta(theta, n+alpha_prior, sum(x) - n + beta_prior)) } gen_data = function(n, theta, alpha, beta) { # function to generate data data = rgeom(n, theta) + 1 # +1 to have same interpretation of Geometric distribution as in course plot_all(data, alpha, beta, main="Manipulating plot") } plot_all = function(x, alpha_prior, beta_prior, main="") { # empty plot plot(1, type="n", xlab="", ylab="", xlim=c(0, 1), ylim=c(0, 5), main=main) theta = seq(0, 1, 0.001) # plot prior prior = beta_prior(theta, alpha_prior, beta_prior) lines(theta, prior, col='green') # plot scaled likelihood (scaling to enable) likelihood = geometric_likelihood(x, theta) likelihood_rescaled = likelihood/max(likelihood) lines(theta, likelihood_rescaled, col='red') # plot posterior posterior = beta_posterior(theta, alpha_prior, beta_prior, x) lines(theta, posterior, col='blue') # legend legend(0.7, 5, legend=c(paste("Prior, alpha =", alpha_prior, ", beta =", beta_prior), "Likelihood (scaled)", "Posterior"), col=c("green", "red", "blue"), lty=1, cex=0.8 ) } # data from exercise 10.32 data = c(2,3,5,8,2) # Theta is probability that an event occur # For example, could be probability that we get a 6 when rolling a dice # Or data is the number of times it takes to get a 6, and it therefore follows a geometric distribution # Prior should encode what you think about theta before observing any data (your prior belief) # Let's compare some different choices # Beta(1,1) = Uniform distribution - we don't know anything about theta plot_all(data, 1,1, "Uniform prior") # Maybe we talk with someone with experience of these particular dice, and they give us some advice # They think it is more likely to be around 0.5: plot_all(data, 3,3, "Prior around 0.5") # They think it is more likely to be a lower value (around 0.2): plot_all(data, 2,5, "Prior around 0.2") # They think it is more likely to be a higher value (around 0.8) plot_all(data, 5, 2, "Prior around 0.8") # What happens with the prior, likelihood and posterior in the different cases? # If you would compare using the posterior to estimate theta and using a maximum likelihood estimation, # what would be different? # install.packages("manipulate") # uncomment to install package manipulate library(manipulate) # experiment yourself with different priors, and using different amount of data # what happens to the posterior when you have a small or large amount of data? # You might have to run twice to get the possibility to manilpulate the values manipulate( gen_data(n, theta, alpha, beta), alpha = slider(0, 10, step=0.1, initial=1, label="alpha"), beta = slider(0, 10, step=0.1, initial=1, label="beta"), n = slider(1,300, step=10, initial=10, label="Number of datapoints"), theta = slider(0, 1, step=0.05, initial=0.2, label="theta") )