{ "metadata": { "name": "kernel_density" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Kernel Density Estimation" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "from scipy import stats\n", "import statsmodels.api as sm\n", "import matplotlib.pyplot as plt\n", "from statsmodels.distributions.mixture_rvs import mixture_rvs" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "A univariate example." ] }, { "cell_type": "code", "collapsed": false, "input": [ "np.random.seed(12345)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "obs_dist1 = mixture_rvs([.25,.75], size=10000, dist=[stats.norm, stats.norm],\n", " kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=.5)))" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "kde = sm.nonparametric.KDEUnivariate(obs_dist1)\n", "kde.fit()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "ax.hist(obs_dist1, bins=50, normed=True, color='red')\n", "ax.plot(kde.support, kde.density, lw=2, color='black');" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "obs_dist2 = mixture_rvs([.25,.75], size=10000, dist=[stats.norm, stats.beta],\n", " kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=1,args=(1,.5))))\n", "\n", "kde2 = sm.nonparametric.KDEUnivariate(obs_dist2)\n", "kde2.fit()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "ax.hist(obs_dist2, bins=50, normed=True, color='red')\n", "ax.plot(kde2.support, kde2.density, lw=2, color='black');" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "raw", "metadata": {}, "source": [ "The fitted KDE object is a full non-parametric distribution." ] }, { "cell_type": "code", "collapsed": false, "input": [ "obs_dist3 = mixture_rvs([.25,.75], size=1000, dist=[stats.norm, stats.norm],\n", " kwargs = (dict(loc=-1,scale=.5),dict(loc=1,scale=.5)))\n", "kde3 = sm.nonparametric.KDEUnivariate(obs_dist3)\n", "kde3.fit()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "kde3.entropy" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "kde3.evaluate(-1)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "CDF" ] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "ax.plot(kde3.support, kde3.cdf);" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Cumulative Hazard Function" ] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "ax.plot(kde3.support, kde3.cumhazard);" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Inverse CDF" ] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "ax.plot(kde3.support, kde3.icdf);" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "Survival Function" ] }, { "cell_type": "code", "collapsed": false, "input": [ "fig = plt.figure(figsize=(12,8))\n", "ax = fig.add_subplot(111)\n", "ax.plot(kde3.support, kde3.sf);" ], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }