{
"metadata": {
"name": "",
"signature": "sha256:d80460bec2760ecad9036f56444cde414faa492b95535884974173263cb1bd0f"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Change Detection Tutorial\n",
"\n",
" author: @amanqa\n",
" source: github.com/amanahuja/change-detection-tutorial\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Modified Stylesheet for notebook.\n",
"from IPython.core.display import HTML\n",
"def css_styling():\n",
" styles = open(\"custom.css\", \"r\").read()\n",
" return HTML(styles)\n",
"\n",
"css_styling()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 1,
"text": [
""
]
}
],
"prompt_number": 1
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##Section 02: Noise and Windows\n",
"\n",
"Lesson Plan\n",
"\n",
" * Streaming statistics\n",
" * Welford's Method\n",
" * \n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import matplotlib.pyplot as plt\n",
"import numpy as np"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import sys; sys.path.append('../src/')\n",
"from change_detector import ChangeDetector, OnlineSimulator"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 3
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Aside: Computing statistics in a stream (running variance)"
]
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"Welford's Method\n",
"to compute running mean and running variance/std\n",
"\n",
"Welford 1962, see also Knuth, AOCP Vol. 2 page 232\n",
"http://www.johndcook.com/standard_deviation.html\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def welford(vals):\n",
" #via https://gist.github.com/alexalemi/2151722\n",
" \n",
" m = vals[0]\n",
" s = 0\n",
" for k in range(1, len(vals)):\n",
" oldm = m\n",
" x = vals[k]\n",
" newm = oldm + (x - oldm) / (k + 1)\n",
" s = s + (x - newm) * (x - oldm)\n",
" m = newm\n",
" return s / (k+1)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"\n",
"#Testing\n",
"seq = np.random.randn(100)\n",
"print welford(seq)\n",
"print np.var(seq)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"0.930460788997\n",
"0.930460788997\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class Welford_Detector(ChangeDetector):\n",
" \"\"\"\n",
" Testing Welford's algorithm\n",
" implemented like our other change detector\n",
" \"\"\"\n",
" \n",
" def __init__(self): \n",
" super(Welford_Detector, self).__init__()\n",
" #Interim and calculated values\n",
" self.k = 0 #k is the signal_size\n",
"\n",
" #For Welford's \n",
" self.mean_ = 0.0 \n",
" self.s = 0.0 # var = s / (k + 1)\n",
" \n",
" def update_residuals(self, new_signal_value): \n",
" #Update residuals \n",
" \n",
" #Welford's. \n",
" oldm = self.mean_\n",
" x = new_signal_value\n",
" newm = oldm + (x - oldm) / (self.k + 1)\n",
" self.s = self.s + (x - newm) * (x - oldm)\n",
" self.mean_ = newm\n",
" \n",
" self.var_ = self.s / (self.k+1)\n",
" \n",
" self.k += 1\n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Initialize detector\n",
"welford = Welford_Detector()\n",
"\n",
"# Create a random signal\n",
"signal = np.random.randint(0,100,size=300)\n",
"\n",
"# Run simulation and fetch residual history\n",
"sim = OnlineSimulator(welford, signal)\n",
"sim.run(plot=False)\n",
"\n",
"residuals = sim.residuals_history\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Comparisons of Welford's Method with numpy builtin stats\n",
"\n",
"print \"{:<40} : {:>}\".format(\n",
" \"Variance via Welford's algorithm\", \n",
" residuals['var_'][-1] #(the last value is the variance after the entire signal has been processed.)\n",
" )\n",
"\n",
"print \"{:<40} : {:>}\".format(\n",
" \"Variance via Numpy\", \n",
" np.var(signal),\n",
" )\n",
"\n",
"print \"{:<40} : {:>}\".format(\n",
" \"Mean via Welford's algorithm\", \n",
" residuals['mean_'][-1] #(the last value is the variance after the entire signal has been processed.)\n",
" )\n",
"\n",
"print \"{:<40} : {:>}\".format(\n",
" \"Mean via Numpy\", \n",
" np.mean(signal),\n",
" )"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Variance via Welford's algorithm : 810.117455556\n",
"Variance via Numpy : 810.117455556\n",
"Mean via Welford's algorithm : 52.4233333333\n",
"Mean via Numpy : 52.4233333333\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print welford.__dict__"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{'mean_': 52.423333333333325, 'rules_triggered': False, 'k': 300, 'signal_size': 0, 'has_started': True, 's': 243035.23666666658, 'var_': 810.11745555555524}\n"
]
}
],
"prompt_number": 15
},
{
"cell_type": "raw",
"metadata": {},
"source": [
"In this way we can collect decent statistics on a stream of data without having to store the entire history. In this way we can collect decent statistics on a stream of data without having to store the entire history. "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Return to the Change Detection Tutorial [Table of Contents](https://github.com/amanahuja/change-detection-tutorial)"
]
}
],
"metadata": {}
}
]
}