{
 "metadata": {
  "name": "",
  "signature": "sha256:d80460bec2760ecad9036f56444cde414faa492b95535884974173263cb1bd0f"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "# Change Detection Tutorial\n",
      "\n",
      "    author: @amanqa\n",
      "    source: github.com/amanahuja/change-detection-tutorial\n"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Modified Stylesheet for notebook.\n",
      "from IPython.core.display import HTML\n",
      "def css_styling():\n",
      "    styles = open(\"custom.css\", \"r\").read()\n",
      "    return HTML(styles)\n",
      "\n",
      "css_styling()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<style>    \n",
        "\n",
        ".input_prompt {\n",
        "    background-color:#dd8855;\n",
        "    -webkit-border-radius: 10px;\n",
        "    -moz-border-radius: 10px;\n",
        "    border-radius: 10px;\n",
        "}\n",
        "    .cell:first-of-type>.input_prompt\n",
        "        {\n",
        "        -webkit-border-radius: 5px 30px 5px 30px;\n",
        "        -moz-border-radius: 5px 30px 5px 30px;\n",
        "        border-radius: 5px 30px 5px 30px;\n",
        "    }\n",
        "\n",
        ".input > .input_prompt{\n",
        "    background-color: #FFF;\n",
        "    }\n",
        "div.cell{        \n",
        "    max-width:750px;        \n",
        "    margin-left:auto;       \n",
        "    margin-right:auto;    \n",
        "    }    \n",
        "\n",
        "h1 {\n",
        "    font-size: 120%;\n",
        "    color: #dd8855;\n",
        "    text-align:center;        \n",
        "    font-familly:\"Charis SIL\", serif;    \n",
        "    }\n",
        "\n",
        "h2 {\n",
        "    font-size: 110%;\n",
        "    color: #dd8855;\n",
        "    }\n",
        "\n",
        "</style>\n"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 1,
       "text": [
        "<IPython.core.display.HTML at 0x28df210>"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "##Section 02: Noise and Windows\n",
      "\n",
      "Lesson Plan\n",
      "\n",
      " * Streaming statistics\n",
      " * Welford's Method\n",
      " * \n"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import matplotlib.pyplot as plt\n",
      "import numpy as np"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import sys; sys.path.append('../src/')\n",
      "from change_detector import ChangeDetector, OnlineSimulator"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "## Aside: Computing statistics in a stream (running variance)"
     ]
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "Welford's Method\n",
      "to compute running mean and running variance/std\n",
      "\n",
      "Welford 1962, see also Knuth, AOCP Vol. 2 page 232\n",
      "http://www.johndcook.com/standard_deviation.html\n"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def welford(vals):\n",
      "    #via https://gist.github.com/alexalemi/2151722\n",
      "    \n",
      "    m = vals[0]\n",
      "    s = 0\n",
      "    for k in range(1, len(vals)):\n",
      "        oldm = m\n",
      "        x = vals[k]\n",
      "        newm = oldm + (x - oldm) / (k + 1)\n",
      "        s = s + (x - newm) * (x - oldm)\n",
      "        m = newm\n",
      "    return s / (k+1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\n",
      "#Testing\n",
      "seq = np.random.randn(100)\n",
      "print welford(seq)\n",
      "print np.var(seq)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "0.930460788997\n",
        "0.930460788997\n"
       ]
      }
     ],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "class Welford_Detector(ChangeDetector):\n",
      "    \"\"\"\n",
      "    Testing Welford's algorithm\n",
      "    implemented like our other change detector\n",
      "    \"\"\"\n",
      "    \n",
      "    def __init__(self): \n",
      "        super(Welford_Detector, self).__init__()\n",
      "        #Interim and calculated values\n",
      "        self.k = 0    #k is the signal_size\n",
      "\n",
      "        #For Welford's \n",
      "        self.mean_ = 0.0     \n",
      "        self.s = 0.0      # var = s / (k + 1)\n",
      "                \n",
      "    def update_residuals(self, new_signal_value): \n",
      "        #Update residuals \n",
      "        \n",
      "        #Welford's. \n",
      "        oldm = self.mean_\n",
      "        x = new_signal_value\n",
      "        newm = oldm + (x - oldm) / (self.k + 1)\n",
      "        self.s = self.s + (x - newm) * (x - oldm)\n",
      "        self.mean_ = newm\n",
      "        \n",
      "        self.var_ = self.s / (self.k+1)\n",
      "        \n",
      "        self.k += 1\n",
      "        "
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Initialize detector\n",
      "welford = Welford_Detector()\n",
      "\n",
      "# Create a random signal\n",
      "signal = np.random.randint(0,100,size=300)\n",
      "\n",
      "# Run simulation and fetch residual history\n",
      "sim = OnlineSimulator(welford, signal)\n",
      "sim.run(plot=False)\n",
      "\n",
      "residuals = sim.residuals_history\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 11
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Comparisons of Welford's Method with numpy builtin stats\n",
      "\n",
      "print \"{:<40} : {:>}\".format(\n",
      "        \"Variance via Welford's algorithm\", \n",
      "        residuals['var_'][-1]  #(the last value is the variance after the entire signal has been processed.)\n",
      "        )\n",
      "\n",
      "print \"{:<40} : {:>}\".format(\n",
      "        \"Variance via Numpy\", \n",
      "        np.var(signal),\n",
      "        )\n",
      "\n",
      "print \"{:<40} : {:>}\".format(\n",
      "        \"Mean via Welford's algorithm\", \n",
      "        residuals['mean_'][-1]  #(the last value is the variance after the entire signal has been processed.)\n",
      "        )\n",
      "\n",
      "print \"{:<40} : {:>}\".format(\n",
      "        \"Mean via Numpy\", \n",
      "        np.mean(signal),\n",
      "        )"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Variance via Welford's algorithm         : 810.117455556\n",
        "Variance via Numpy                       : 810.117455556\n",
        "Mean via Welford's algorithm             : 52.4233333333\n",
        "Mean via Numpy                           : 52.4233333333\n"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print welford.__dict__"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "{'mean_': 52.423333333333325, 'rules_triggered': False, 'k': 300, 'signal_size': 0, 'has_started': True, 's': 243035.23666666658, 'var_': 810.11745555555524}\n"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "raw",
     "metadata": {},
     "source": [
      "In this way we can collect decent statistics on a stream of data without having to store the entire history. In this way we can collect decent statistics on a stream of data without having to store the entire history. "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Return to the Change Detection Tutorial [Table of Contents](https://github.com/amanahuja/change-detection-tutorial)"
     ]
    }
   ],
   "metadata": {}
  }
 ]
}