{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Part 1 \n", "\n", " - start with fitting data to a parametric model\n", " \n", " - so we need to create the log-likehood\n", " - let's create some fake data first." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD7CAYAAACRxdTpAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAARq0lEQVR4nO3df6zddX3H8edrVMS5zfLjrmFtXVloNGSZyG5YjWZxdC6AxvKHIzg3GtLk7g+26XSZnf84ly2BZBElW0ga61YWhzLUtVGy2RSM2x+gF0EUquHKxLYp9IqAOuYP9L0/zqfh0N72ntv7i37u85GcnM/3/fl8z/mcb3pf99vP/Z5zUlVIkvryc8s9AUnSwjPcJalDhrskdchwl6QOGe6S1CHDXZI6NFK4J/nzJA8l+VqS25KcleSCJPcmmUryiSRntrEvbdtTrX/DYr4ASdLxZg33JGuBPwPGq+rXgTOAa4AbgZuq6kLgKWBb22Ub8FSr39TGSZKW0Ko5jHtZkp8APw8cBi4D/qD17wL+GrgF2NLaAHcA/5AkdZJ3S5133nm1YcOGuc5dkla0++677ztVNTZT36zhXlWHkvw98G3g/4DPAfcBT1fVc23YQWBta68FDrR9n0vyDHAu8J3hx00yAUwAvPKVr2RycnKur0uSVrQkj52ob5RlmbMZnI1fAPwK8HLg8vlOqqp2VNV4VY2Pjc34i0eSdIpG+YPq7wL/U1XTVfUT4FPA64HVSY6e+a8DDrX2IWA9QOt/BfDkgs5aknRSo4T7t4FNSX4+SYDNwMPA3cDb2pitwO7W3tO2af13nWy9XZK08GYN96q6l8EfRr8MfLXtswN4L/DuJFMM1tR3tl12Aue2+ruB7Yswb0nSSeTFcFI9Pj5e/kFVkuYmyX1VNT5Tn+9QlaQOGe6S1CHDXZI6ZLhLUodG/fiBF60N2z87Y/1bN7x5iWciSS8enrlLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1aNZwT/KqJA8M3b6X5F1JzkmyN8kj7f7sNj5Jbk4yleTBJJcs/suQJA0b5Quyv1FVF1fVxcBvAs8Cn2bwxdf7qmojsI/nvwj7CmBju00AtyzGxCVJJzbXZZnNwDer6jFgC7Cr1XcBV7X2FuDWGrgHWJ3k/AWZrSRpJHMN92uA21p7TVUdbu3HgTWtvRY4MLTPwVZ7gSQTSSaTTE5PT89xGpKkkxk53JOcCbwV+Ldj+6qqgJrLE1fVjqoar6rxsbGxuewqSZrFXM7crwC+XFVPtO0nji63tPsjrX4IWD+037pWkyQtkbmE+9t5fkkGYA+wtbW3AruH6te2q2Y2Ac8MLd9IkpbASF+QneTlwJuAPx4q3wDcnmQb8BhwdavfCVwJTDG4sua6BZutJGkkI4V7Vf0vcO4xtScZXD1z7NgCrl+Q2UmSTonvUJWkDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1KGRwj3J6iR3JPl6kv1JXpfknCR7kzzS7s9uY5Pk5iRTSR5McsnivgRJ0rFGPXP/MPAfVfVq4DXAfmA7sK+qNgL72jbAFcDGdpsAblnQGUuSZjVruCd5BfDbwE6AqvpxVT0NbAF2tWG7gKtaewtwaw3cA6xOcv6Cz1ySdEKjnLlfAEwD/5Tk/iQfSfJyYE1VHW5jHgfWtPZa4MDQ/gdb7QWSTCSZTDI5PT196q9AknScUcJ9FXAJcEtVvRb4X55fggGgqgqouTxxVe2oqvGqGh8bG5vLrpKkWYwS7geBg1V1b9u+g0HYP3F0uaXdH2n9h4D1Q/uvazVJ0hKZNdyr6nHgQJJXtdJm4GFgD7C11bYCu1t7D3Btu2pmE/DM0PKNJGkJrBpx3J8CH0tyJvAocB2DXwy3J9kGPAZc3cbeCVwJTAHPtrGSpCU0UrhX1QPA+Axdm2cYW8D185yXJGkefIeqJHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOjRTuSb6V5KtJHkgy2WrnJNmb5JF2f3arJ8nNSaaSPJjkksV8AZKk483lzP13quriqjr6XarbgX1VtRHY17YBrgA2ttsEcMtCTVaSNJr5LMtsAXa19i7gqqH6rTVwD7A6yfnzeB5J0hyNGu4FfC7JfUkmWm1NVR1u7ceBNa29FjgwtO/BVnuBJBNJJpNMTk9Pn8LUJUknsmrEcW+oqkNJfhnYm+Trw51VVUlqLk9cVTuAHQDj4+Nz2leSdHIjnblX1aF2fwT4NHAp8MTR5ZZ2f6QNPwSsH9p9XatJkpbIrOGe5OVJfvFoG/g94GvAHmBrG7YV2N3ae4Br21Uzm4BnhpZvJElLYJRlmTXAp5McHf+vVfUfSb4E3J5kG/AYcHUbfydwJTAFPAtct+CzliSd1KzhXlWPAq+Zof4ksHmGegHXL8jsJEmnxHeoSlKHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nq0MjhnuSMJPcn+UzbviDJvUmmknwiyZmt/tK2PdX6NyzO1CVJJzKXM/d3AvuHtm8EbqqqC4GngG2tvg14qtVvauMkSUtopHBPsg54M/CRth3gMuCONmQXcFVrb2nbtP7NbbwkaYmMeub+IeAvgZ+17XOBp6vqubZ9EFjb2muBAwCt/5k2XpK0RGYN9yRvAY5U1X0L+cRJJpJMJpmcnp5eyIeWpBVvlDP31wNvTfIt4OMMlmM+DKxOsqqNWQccau1DwHqA1v8K4MljH7SqdlTVeFWNj42NzetFSJJeaNZwr6q/qqp1VbUBuAa4q6reAdwNvK0N2wrsbu09bZvWf1dV1YLOWpJ0UvO5zv29wLuTTDFYU9/Z6juBc1v93cD2+U1RkjRXq2Yf8ryq+jzw+dZ+FLh0hjE/BH5/AeYmSTpFvkNVkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6tCs4Z7krCRfTPKVJA8l+UCrX5Dk3iRTST6R5MxWf2nbnmr9Gxb3JUiSjjXKmfuPgMuq6jXAxcDlSTYBNwI3VdWFwFPAtjZ+G/BUq9/UxkmSltCs4V4DP2ibL2m3Ai4D7mj1XcBVrb2lbdP6NyfJgs1YkjSrkdbck5yR5AHgCLAX+CbwdFU914YcBNa29lrgAEDrfwY4d4bHnEgymWRyenp6fq9CkvQCI4V7Vf20qi4G1gGXAq+e7xNX1Y6qGq+q8bGxsfk+nCRpyJyulqmqp4G7gdcBq5Osal3rgEOtfQhYD9D6XwE8uSCzlSSNZJSrZcaSrG7tlwFvAvYzCPm3tWFbgd2tvadt0/rvqqpayElLkk5u1exDOB/YleQMBr8Mbq+qzyR5GPh4kr8F7gd2tvE7gX9JMgV8F7hmEeYtSTqJWcO9qh4EXjtD/VEG6+/H1n8I/P6CzE6SdEp8h6okdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA6N8gXZ65PcneThJA8leWern5Nkb5JH2v3ZrZ4kNyeZSvJgkksW+0VIkl5olDP354D3VNVFwCbg+iQXAduBfVW1EdjXtgGuADa22wRwy4LPWpJ0UrOGe1Udrqovt/b3gf3AWmALsKsN2wVc1dpbgFtr4B5gdZLzF3zmkqQTmtOae5INwGuBe4E1VXW4dT0OrGnttcCBod0OttqxjzWRZDLJ5PT09BynLUk6mZHDPckvAJ8E3lVV3xvuq6oCai5PXFU7qmq8qsbHxsbmsqskaRYjhXuSlzAI9o9V1ada+Ymjyy3t/kirHwLWD+2+rtUkSUtklKtlAuwE9lfVB4e69gBbW3srsHuofm27amYT8MzQ8o0kaQmsGmHM64E/Ar6a5IFWex9wA3B7km3AY8DVre9O4EpgCngWuG5BZyxJmtWs4V5V/w3kBN2bZxhfwPXznJckaR58h6okdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA6N8gXZH01yJMnXhmrnJNmb5JF2f3arJ8nNSaaSPJjkksWcvCRpZqOcuf8zcPkxte3AvqraCOxr2wBXABvbbQK4ZWGmKUmai1nDvaq+AHz3mPIWYFdr7wKuGqrfWgP3AKuTnL9Qk5UkjeZU19zXVNXh1n4cWNPaa4EDQ+MOttpxkkwkmUwyOT09fYrTkCTNZN5/UK2qAuoU9ttRVeNVNT42NjbfaUiShpxquD9xdLml3R9p9UPA+qFx61pNkrSETjXc9wBbW3srsHuofm27amYT8MzQ8o0kaYmsmm1AktuANwLnJTkIvB+4Abg9yTbgMeDqNvxO4EpgCngWuG4R5ixJmsWs4V5Vbz9B1+YZxhZw/XwntRA2bP/sjPVv3fDmJZ6JJC0936EqSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOjTrpZC9OdElkuBlkpL64Zm7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6tCKuxTyZPwkSUm98MxdkjpkuEtSh1yWGYHLNZJON565S1KHFuXMPcnlwIeBM4CPVNUNi/E8y+1kn1MzE8/0JS2VBQ/3JGcA/wi8CTgIfCnJnqp6eKGf63Tj8o6kpbIYZ+6XAlNV9ShAko8DW4AVH+4nMtf/ASynuf4i8heatDwWI9zXAgeGtg8Cv3XsoCQTwETb/EGSb5zi850HfOcU9+3Voh2T3Pjiepw58N/J8TwmMzudjsuvnqhj2a6WqaodwI75Pk6SyaoaX4ApdcNjcjyPyfE8JjPr5bgsxtUyh4D1Q9vrWk2StEQWI9y/BGxMckGSM4FrgD2L8DySpBNY8GWZqnouyZ8A/8ngUsiPVtVDC/08Q+a9tNMhj8nxPCbH85jMrIvjkqpa7jlIkhaY71CVpA4Z7pLUodM63JNcnuQbSaaSbF/u+SyHJB9NciTJ14Zq5yTZm+SRdn/2cs5xqSVZn+TuJA8neSjJO1t9xR6XJGcl+WKSr7Rj8oFWvyDJve1n6BPtIogVJckZSe5P8pm23cUxOW3DfehjDq4ALgLenuSi5Z3Vsvhn4PJjatuBfVW1EdjXtleS54D3VNVFwCbg+vZvYyUflx8Bl1XVa4CLgcuTbAJuBG6qqguBp4BtyzjH5fJOYP/QdhfH5LQNd4Y+5qCqfgwc/ZiDFaWqvgB895jyFmBXa+8CrlrSSS2zqjpcVV9u7e8z+MFdywo+LjXwg7b5knYr4DLgjlZfUccEIMk64M3AR9p26OSYnM7hPtPHHKxdprm82KypqsOt/TiwZjkns5ySbABeC9zLCj8ubfnhAeAIsBf4JvB0VT3XhqzEn6EPAX8J/Kxtn0snx+R0DneNoAbXuq7I612T/ALwSeBdVfW94b6VeFyq6qdVdTGDd41fCrx6mae0rJK8BThSVfct91wWw+n8TUx+zMGJPZHk/Ko6nOR8BmdqK0qSlzAI9o9V1adaecUfF4CqejrJ3cDrgNVJVrUz1ZX2M/R64K1JrgTOAn6JwfdQdHFMTuczdz/m4MT2AFtbeyuwexnnsuTauulOYH9VfXCoa8UelyRjSVa39ssYfN/CfuBu4G1t2Io6JlX1V1W1rqo2MMiPu6rqHXRyTE7rd6i237gf4vmPOfi7ZZ7SkktyG/BGBh9T+gTwfuDfgduBVwKPAVdX1bF/dO1WkjcA/wV8lefXUt/HYN19RR6XJL/B4I+DZzA4qbu9qv4mya8xuBjhHOB+4A+r6kfLN9PlkeSNwF9U1Vt6OSandbhLkmZ2Oi/LSJJOwHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHfp/paL2i7eBPXAAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "\n", "\n", "T = (np.random.exponential(size=1000)/1.5) ** 2.3\n", "\n", "plt.hist(T, bins=50);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's use the Weibull parametric model: https://en.wikipedia.org/wiki/Weibull_distribution" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def pdf(params, t):\n", " lambda_, rho_ = params\n", " # I'm not going to make you type this out - it's annoying and error prone. \n", " return rho_ / lambda_ * (t / lambda_) ** (rho_ - 1) * np.exp(-(t/lambda_) ** rho_)\n", "\n", "# okay, but we actually need the _log_ of the pdf\n", "def log_pdf(params, t):\n", " lambda_, rho_ = params\n", " # I'm not going to make you type this out - it's annoying and error prone. \n", " return np.log(rho_) - np.log(lambda_) + (rho_ - 1) * (np.log(t) - np.log(lambda_)) - (t/lambda_) ** rho_\n", "\n", "# now we can define the log likehood\n", "\n", "def log_likelihood(params, t):\n", " return np.sum(log_pdf(params, t)) \n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## `scipy.optimize.minimize`" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "slideshow": { "slide_type": "-" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " fun: nan\n", " hess_inv: array([[1, 0],\n", " [0, 1]])\n", " jac: array([nan, nan])\n", " message: 'Desired error not necessarily achieved due to precision loss.'\n", " nfev: 448\n", " nit: 1\n", " njev: 112\n", " status: 2\n", " success: False\n", " x: array([ -29.10248229, 1034.80182732])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in power\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in power\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in power\n", " # Remove the CWD from sys.path while we load stuff.\n" ] } ], "source": [ "from scipy.optimize import minimize\n", "\n", "results = minimize(log_likelihood, \n", " x0 = np.array([1.0, 1.0]), # some initial guess of the parameters. \n", " method=None, \n", " args=(T, ))\n", "\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def log_likelihood(params, t):\n", " return -np.sum(log_pdf(params, t)) \n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " fun: nan\n", " hess_inv: array([[1, 0],\n", " [0, 1]])\n", " jac: array([nan, nan])\n", " message: 'Desired error not necessarily achieved due to precision loss.'\n", " nfev: 448\n", " nit: 1\n", " njev: 112\n", " status: 2\n", " success: False\n", " x: array([ 31.10248229, -1032.80182732])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in power\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in power\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in log\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in power\n", " # Remove the CWD from sys.path while we load stuff.\n" ] } ], "source": [ "results = minimize(log_likelihood, \n", " x0 = np.array([1.0, 1.0]), # some initial guess of the parameters. \n", " method=None, \n", " args=(T, ))\n", "\n", "print(results)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " fun: nan\n", " hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>\n", " jac: array([ 59.76914963, 2616.4018891 ])\n", " message: b'ABNORMAL_TERMINATION_IN_LNSRCH'\n", " nfev: 129\n", " nit: 1\n", " status: 2\n", " success: False\n", " x: array([1.16054112, 0.99805959])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: divide by zero encountered in log\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in add\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: divide by zero encountered in true_divide\n", " # Remove the CWD from sys.path while we load stuff.\n", "/Users/camerondavidson-pilon/venvs/data/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in double_scalars\n", " # Remove the CWD from sys.path while we load stuff.\n" ] } ], "source": [ "# weibull parameters must be greater than 0!\n", "# we can \"nudge\" the minimizer to understand this using the bounds argument\n", "results = minimize(log_likelihood, \n", " x0 = np.array([1.0, 1.0]),\n", " method=None, \n", " args=(T, ),\n", " bounds=((0, None), (0, None)))\n", "\n", "print(results)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Takeaway: `minimize` is a very flexible function for small-mid size parameter optimization" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### A few problems though:\n", "\n", "1. You are stuck with only using known parametric models that are easy to implement. \n", "2. Not very fast minimization routines\n", "\n", "Let's move to Part 2. " ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }