{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Poisson Regression\n",
"\n",
"Gaussian process models can be incredibly flexbile for modelling non-Gaussian data. One such example is in the case of count data which can be modelled with a __Poisson model__ with a latent Gaussian process.\n",
"$$\n",
"\\mathbf{y} \\ | \\ \\mathbf{f} \\sim \\prod_{i=1}^{n} \\frac{\\lambda_i^{y_i}\\exp\\{-\\lambda_i\\}}{y_i!},\n",
"$$\n",
"where $\\lambda_i=\\exp(f_i)$.\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
""
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Load the package\n",
"using GaussianProcesses\n",
"\n",
"#Simulate the data\n",
"srand(203617)\n",
"n = 20\n",
"X = collect(linspace(-3,3,n));\n",
"f = 2*cos.(2*X);\n",
"Y = [rand(Distributions.Poisson(exp.(f[i]))) for i in 1:n];\n",
"\n",
"#Plot the data using the Plots.jl package with the GR backend\n",
"using Plots\n",
"gr()\n",
"scatter(X,Y,leg=false, fmt=:png)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"GP Monte Carlo object:\n",
" Dim = 1\n",
" Number of observations = 20\n",
" Mean function:\n",
" Type: GaussianProcesses.MeanZero, Params: Float64[]\n",
" Kernel:\n",
" Type: GaussianProcesses.Mat32Iso, Params: [0.0, 0.0]\n",
" Likelihood:\n",
" Type: GaussianProcesses.PoisLik, Params: Any[]\n",
" Input observations = \n",
"[-3.0 -2.68421 … 2.68421 3.0]\n",
" Output observations = [3, 3, 1, 0, 0, 0, 0, 0, 3, 4, 7, 3, 1, 0, 0, 1, 0, 3, 4, 4]\n",
" Log-posterior = -65.397"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#GP set-up\n",
"k = Matern(3/2,0.0,0.0) # Matern 3/2 kernel\n",
"l = PoisLik() # Poisson likelihood\n",
"gp = GP(X, vec(Y), MeanZero(), k, l)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BasicMCJob:\n",
" Variable [1]: p (BasicContMuvParameter)\n",
" GenericModel: 1 variables, 0 dependencies (directed graph)\n",
" HMC sampler: number of leaps = 10, leap step = 0.1\n",
" VanillaMCTuner: period = 100, verbose = false\n",
" BasicMCRange: number of steps = 49991, burnin = 10000, thinning = 10"
]
}
],
"source": [
"set_priors!(gp.k,[Distributions.Normal(-2.0,4.0),Distributions.Normal(-2.0,4.0)])\n",
"\n",
"samples = mcmc(gp;nIter=50000, thin=10, burnin=10000)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Sample predicted values\n",
"xtest = linspace(minimum(gp.X),maximum(gp.X),50);\n",
"ymean = [];\n",
"fsamples = [];\n",
"for i in 1:size(samples,2)\n",
" set_params!(gp,samples[:,i])\n",
" update_target!(gp)\n",
" push!(ymean, predict_y(gp,xtest)[1])\n",
" push!(fsamples,rand(gp,xtest))\n",
"end\n",
"\n",
"#Predictive plots\n",
"\n",
"sd = [std(fsamples[i]) for i in 1:50]\n",
"plot(xtest,mean(ymean),ribbon=2*sd,leg=false, fmt=:png)\n",
"scatter!(X,Y)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 0.6.0",
"language": "julia",
"name": "julia-0.6"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "0.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}