{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Maximum Likelihood Estimation of the Student t distribution using EM Algorithm" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": false }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "using Plots\n", "using StatsPlots\n", "using Random\n", "using Distributions\n", "using SpecialFunctions\n", "using Statistics\n", "using StatsBase\n", "using Optim\n", "using ReverseDiff: gradient\n", "\n", "Random.seed!(1234)\n", "\n", "function e_eta(x, nu, lambda_, mu)\n", " return (nu + 1)/(nu + lambda_ * (x - mu)^2)\n", "end\n", "\n", "function e_etas(X, nu, lambda_, mu)\n", " return [ e_eta(x, nu, lambda_, mu) for x in X]\n", "end\n", "\n", "function e_log_eta(x, nu, lambda_, mu)\n", " return digamma((nu + 1.0)/2.0) - log((nu + lambda_ * (x - mu)^2)/2.0)\n", "end\n", "\n", "function e_log_etas(X, nu, lambda_, mu)\n", " return [ e_log_eta(x, nu, lambda_, mu) for x in X]\n", "end\n", "\n", "function fit_t(X)\n", " n = length(X)\n", " \n", " # init\n", " mu = median(X)\n", " lambda_ = iqr(X)/2.0\n", " nu = 1.0\n", " \n", " for i in 1:1000\n", " # E step\n", " e_etas_ = e_etas(X, nu, lambda_, mu)\n", " e_log_etas_ = e_log_etas(X, nu[1], lambda_, mu)\n", " \n", " \n", " # M step\n", " # mu\n", " mu = (X' * e_etas_) / sum(e_etas_)\n", " # lambda_\n", " lambda_ = 1.0 / ( (((X .- mu).^2)' * e_etas_) / n)\n", " # nu\n", " function f(nu)\n", " return digamma(nu[1]/2.0) - log(nu[1]/2.0) - (1.0 + sum(e_log_etas_)/n - sum(e_etas_)/n)\n", " end\n", " for j in 1:1000\n", " tmp = nu - f([nu])/gradient(f, [nu])[1]\n", " if !isnan(tmp)\n", " nu = max(tmp, 1e-6)\n", " if abs(f([nu])) < 1e-6\n", " break\n", " end\n", " else\n", " break\n", " end\n", " end\n", " end\n", " \n", " return mu, lambda_, nu\n", "end\n", "\n", "d = Normal(0.0, 1.0)\n", "noise_d = Normal(100.0, 1.0)\n", "data = rand(d, 10)\n", "outlier = rand(noise_d, 1)\n", "X = vcat(data, outlier)\n", "mu, lambda_, nu = fit_t(X)\n", "\n", "scatter(data, zeros(length(data)), label=\"data\")\n", "scatter!(outlier, zeros(length(outlier)), label=\"outlier\")\n", "plot!(fit_mle(Normal, X), label=\"Normal\")\n", "\n", "Xs = Array(range(-100, 120, step=0.1))\n", "function t_pdf(x, mu, lamda_, nu)\n", " return gamma((nu+1)/2)./gamma(nu/2).*sqrt(lambda_/(pi*nu)).*(1 .+ lambda_*(x .- mu).^2/nu).^(-(nu+1)/2)\n", "end\n", "Ys = t_pdf(Xs, mu, lambda_, nu)\n", "plot!(Xs, Ys.*0.035, label=\"Student t Distribution\", title=\"MLE of the Student t distribution using EM Algorithm\")" ] } ], "metadata": { "kernelspec": { "display_name": "Julia 1.4.2", "language": "julia", "name": "julia-1.4" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "1.4.2" } }, "nbformat": 4, "nbformat_minor": 4 }