{ "cells": [ { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Encoders\n", "\n", "* Scalar\n", "* Date/time\n", "* Category\n", "* Multi" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import numpy" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from nupic.encoders import ScalarEncoder\n", "\n", "ScalarEncoder?" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n", "4 = [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n", "5 = [0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n" ] } ], "source": [ "# 22 bits with 3 active representing values 0 to 100\n", "# clipInput=True makes values >100 encode the same as 100 (instead of throwing a ValueError)\n", "# forced=True allows small values for `n` and `w`\n", "enc = ScalarEncoder(n=22, w=3, minval=2.5, maxval=97.5, clipInput=True, forced=True)\n", "print \"3 =\", enc.encode(3)\n", "print \"4 =\", enc.encode(4)\n", "print \"5 =\", enc.encode(5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "100 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]\n", "1000 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]\n" ] } ], "source": [ "# Encode maxval\n", "print \"100 =\", enc.encode(100)\n", "# See that any larger number gets the same encoding\n", "print \"1000 =\", enc.encode(1000)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from nupic.encoders.random_distributed_scalar import RandomDistributedScalarEncoder\n", "\n", "RandomDistributedScalarEncoder?" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1]\n", "4 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1]\n", "5 = [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1]\n", "\n", "100 = [0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n", "1000 = [0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0]\n" ] } ], "source": [ "# 21 bits with 3 active with buckets of size 5\n", "rdse = RandomDistributedScalarEncoder(n=21, w=3, resolution=5, offset=2.5)\n", "\n", "print \"3 = \", rdse.encode(3)\n", "print \"4 = \", rdse.encode(4)\n", "print \"5 = \", rdse.encode(5)\n", "print\n", "print \"100 = \", rdse.encode(100)\n", "print \"1000 =\", rdse.encode(1000)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "import datetime\n", "from nupic.encoders.date import DateEncoder\n", "\n", "DateEncoder?" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "now = [0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0]\n", "next month = [0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0]\n", "xmas = [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]\n" ] } ], "source": [ "de = DateEncoder(season=5)\n", "\n", "now = datetime.datetime.strptime(\"2014-05-02 13:08:58\", \"%Y-%m-%d %H:%M:%S\")\n", "print \"now = \", de.encode(now)\n", "nextMonth = datetime.datetime.strptime(\"2014-06-02 13:08:58\", \"%Y-%m-%d %H:%M:%S\")\n", "print \"next month =\", de.encode(nextMonth)\n", "xmas = datetime.datetime.strptime(\"2014-12-25 13:08:58\", \"%Y-%m-%d %H:%M:%S\")\n", "print \"xmas = \", de.encode(xmas)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cat = [0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]\n", "dog = [0 0 0 0 0 0 1 1 1 0 0 0 0 0 0]\n", "monkey = [0 0 0 0 0 0 0 0 0 1 1 1 0 0 0]\n", "slow loris = [0 0 0 0 0 0 0 0 0 0 0 0 1 1 1]\n" ] } ], "source": [ "from nupic.encoders.category import CategoryEncoder\n", "\n", "categories = (\"cat\", \"dog\", \"monkey\", \"slow loris\")\n", "encoder = CategoryEncoder(w=3, categoryList=categories, forced=True)\n", "cat = encoder.encode(\"cat\")\n", "dog = encoder.encode(\"dog\")\n", "monkey = encoder.encode(\"monkey\")\n", "loris = encoder.encode(\"slow loris\")\n", "print \"cat = \", cat\n", "print \"dog = \", dog\n", "print \"monkey = \", monkey\n", "print \"slow loris =\", loris" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n" ] } ], "source": [ "print encoder.encode(None)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 1 1 0 0 0 0 0 0 0 0 0 0 0 0]\n" ] } ], "source": [ "print encoder.encode(\"unknown\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "({'category': ([(1, 1)], 'cat')}, ['category'])\n" ] } ], "source": [ "print encoder.decode(cat)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "({'category': ([(1, 2)], 'cat, dog')}, ['category'])\n" ] } ], "source": [ "catdog = numpy.array([0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0])\n", "print encoder.decode(catdog)" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Spatial Pooler" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from nupic.research.spatial_pooler import SpatialPooler\n", "\n", "print SpatialPooler?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "print SpatialPooler" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print SpatialPooler" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print SpatialPooler" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print SpatialPooler" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print SpatialPooler" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print SpatialPooler" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print SpatialPooler" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print SpatialPooler" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print SpatialPooler" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "15\n", "[0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]\n" ] } ], "source": [ "print len(cat)\n", "print cat" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 1 1 1 1 0 0 0 0 1 1 1 1 0]\n", "[1 0 0 0 1 1 1 1 0 1 0 0 0 1 1]\n", "[1 1 0 0 0 0 0 1 1 1 1 1 1 0 0]\n", "[1 1 0 1 1 0 0 1 1 0 1 0 0 1 1]\n" ] } ], "source": [ "sp = SpatialPooler(inputDimensions=(15,),\n", " columnDimensions=(4,),\n", " potentialRadius=15,\n", " numActiveColumnsPerInhArea=1,\n", " globalInhibition=True,\n", " synPermActiveInc=0.03,\n", " potentialPct=1.0)\n", "import numpy\n", "for column in xrange(4):\n", " connected = numpy.zeros((15,), dtype=\"int\")\n", " sp.getConnectedSynapses(column, connected)\n", " print connected" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 0 0 0]\n" ] } ], "source": [ "output = numpy.zeros((4,), dtype=\"int\")\n", "sp.compute(cat, learn=True, activeArray=output)\n", "print output" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "for _ in xrange(20):\n", " sp.compute(cat, learn=True, activeArray=output)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 1 1 1 1 0 0 0 0 1 1 1 1 0]\n", "[1 0 0 0 1 1 1 1 0 1 0 0 0 1 1]\n", "[1 1 0 0 0 0 0 1 1 1 1 1 1 0 0]\n", "[1 1 0 1 1 0 0 1 1 0 1 0 0 1 1]\n" ] } ], "source": [ "for column in xrange(4):\n", " connected = numpy.zeros((15,), dtype=\"int\")\n", " sp.getConnectedSynapses(column, connected)\n", " print connected" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "for _ in xrange(200):\n", " sp.compute(cat, learn=True, activeArray=output)\n", " sp.compute(dog, learn=True, activeArray=output)\n", " sp.compute(monkey, learn=True, activeArray=output)\n", " sp.compute(loris, learn=True, activeArray=output)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 0 1 1 1 0 0 0 0 0 0 0 0 0]\n", "[1 0 0 0 1 1 1 1 0 1 0 0 0 1 1]\n", "[0 0 0 0 0 0 0 0 0 1 1 1 0 0 0]\n", "[0 0 0 0 0 0 1 1 1 0 0 0 1 1 1]\n" ] } ], "source": [ "for column in xrange(4):\n", " connected = numpy.zeros((15,), dtype=\"int\")\n", " sp.getConnectedSynapses(column, connected)\n", " print connected" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 0 1 1 0 1 0 0 0 0 0 0 0 0]\n" ] } ], "source": [ "noisyCat = numpy.zeros((15,), dtype=\"uint32\")\n", "noisyCat[3] = 1\n", "noisyCat[4] = 1\n", "# This is part of dog!\n", "noisyCat[6] = 1\n", "print noisyCat" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 1 0 0]\n" ] } ], "source": [ "sp.compute(noisyCat, learn=False, activeArray=output)\n", "print output # matches cat!" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Temporal Memory (a.k.a. Sequence Memory, Temporal Pooler)\n", "\n", "From: `examples/tm/hello_tm.py`" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from nupic.research.BacktrackingTM import BacktrackingTM\n", "\n", "BacktrackingTM?" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "# Step 1: create Temporal Pooler instance with appropriate parameters\n", "tm = BacktrackingTM(numberOfCols=50, cellsPerColumn=2,\n", " initialPerm=0.5, connectedPerm=0.5,\n", " minThreshold=10, newSynapseCount=10,\n", " permanenceInc=0.1, permanenceDec=0.0,\n", " activationThreshold=8,\n", " globalDecay=0, burnIn=1,\n", " checkSynapseConsistency=False,\n", " pamLength=10)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "# Step 2: create input vectors to feed to the temporal memory. Each input vector\n", "# must be numberOfCols wide. Here we create a simple sequence of 5 vectors\n", "# representing the sequence A -> B -> C -> D -> E\n", "x = numpy.zeros((5, tm.numberOfCols), dtype=\"uint32\")\n", "x[0,0:10] = 1 # Input SDR representing \"A\", corresponding to columns 0-9\n", "x[1,10:20] = 1 # Input SDR representing \"B\", corresponding to columns 10-19\n", "x[2,20:30] = 1 # Input SDR representing \"C\", corresponding to columns 20-29\n", "x[3,30:40] = 1 # Input SDR representing \"D\", corresponding to columns 30-39\n", "x[4,40:50] = 1 # Input SDR representing \"E\", corresponding to columns 40-49" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "# Step 3: send this simple sequence to the temporal memory for learning\n", "# We repeat the sequence 10 times\n", "for i in range(10):\n", "\n", " # Send each letter in the sequence in order\n", " for j in range(5):\n", "\n", " # The compute method performs one step of learning and/or inference. Note:\n", " # here we just perform learning but you can perform prediction/inference and\n", " # learning in the same step if you want (online learning).\n", " tm.compute(x[j], enableLearn = True, enableInference = False)\n", "\n", " # This function prints the segments associated with every cell.$$$$\n", " # If you really want to understand the TP, uncomment this line. By following\n", " # every step you can get an excellent understanding for exactly how the TP\n", " # learns.\n", " #tm.printCells()\n", "\n", " # The reset command tells the TM that a sequence just ended and essentially\n", " # zeros out all the states. It is not strictly necessary but it's a bit\n", " # messier without resets, and the TM learns quicker with resets.\n", " tm.reset()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "-------- A -----------\n", "Raw input vector\n", "1111111111 0000000000 0000000000 0000000000 0000000000 \n", "\n", "All the active and predicted cells:\n", "\n", "Inference Active state\n", "1111111111 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "Inference Predicted state\n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 1111111111 0000000000 0000000000 0000000000 \n", "\n", "\n", "The following columns are predicted by the temporal memory. This\n", "should correspond to columns in the *next* item in the sequence.\n", "[10 11 12 13 14 15 16 17 18 19] \n", "\n", "\n", "-------- B -----------\n", "Raw input vector\n", "0000000000 1111111111 0000000000 0000000000 0000000000 \n", "\n", "All the active and predicted cells:\n", "\n", "Inference Active state\n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 1111111111 0000000000 0000000000 0000000000 \n", "Inference Predicted state\n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 0000000000 1111111111 0000000000 0000000000 \n", "\n", "\n", "The following columns are predicted by the temporal memory. This\n", "should correspond to columns in the *next* item in the sequence.\n", "[20 21 22 23 24 25 26 27 28 29] \n", "\n", "\n", "-------- C -----------\n", "Raw input vector\n", "0000000000 0000000000 1111111111 0000000000 0000000000 \n", "\n", "All the active and predicted cells:\n", "\n", "Inference Active state\n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 0000000000 1111111111 0000000000 0000000000 \n", "Inference Predicted state\n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 0000000000 0000000000 1111111111 0000000000 \n", "\n", "\n", "The following columns are predicted by the temporal memory. This\n", "should correspond to columns in the *next* item in the sequence.\n", "[30 31 32 33 34 35 36 37 38 39] \n", "\n", "\n", "-------- D -----------\n", "Raw input vector\n", "0000000000 0000000000 0000000000 1111111111 0000000000 \n", "\n", "All the active and predicted cells:\n", "\n", "Inference Active state\n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 0000000000 0000000000 1111111111 0000000000 \n", "Inference Predicted state\n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 0000000000 0000000000 0000000000 1111111111 \n", "\n", "\n", "The following columns are predicted by the temporal memory. This\n", "should correspond to columns in the *next* item in the sequence.\n", "[40 41 42 43 44 45 46 47 48 49] \n", "\n", "\n", "-------- E -----------\n", "Raw input vector\n", "0000000000 0000000000 0000000000 0000000000 1111111111 \n", "\n", "All the active and predicted cells:\n", "\n", "Inference Active state\n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 0000000000 0000000000 0000000000 1111111111 \n", "Inference Predicted state\n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "0000000000 0000000000 0000000000 0000000000 0000000000 \n", "\n", "\n", "The following columns are predicted by the temporal memory. This\n", "should correspond to columns in the *next* item in the sequence.\n", "[] \n" ] } ], "source": [ "# Step 4: send the same sequence of vectors and look at predictions made by\n", "# temporal memory\n", "\n", "# Utility routine for printing the input vector\n", "def formatRow(x):\n", " s = ''\n", " for c in range(len(x)):\n", " if c > 0 and c % 10 == 0:\n", " s += ' '\n", " s += str(x[c])\n", " s += ' '\n", " return s\n", "\n", "for j in range(5):\n", " print \"\\n\\n--------\",\"ABCDE\"[j],\"-----------\"\n", " print \"Raw input vector\\n\",formatRow(x[j])\n", "\n", " # Send each vector to the TP, with learning turned off\n", " tm.compute(x[j], enableLearn=False, enableInference=True)\n", "\n", " # This method prints out the active state of each cell followed by the\n", " # predicted state of each cell. For convenience the cells are grouped\n", " # 10 at a time. When there are multiple cells per column the printout\n", " # is arranged so the cells in a column are stacked together\n", " #\n", " # What you should notice is that the columns where active state is 1\n", " # represent the SDR for the current input pattern and the columns where\n", " # predicted state is 1 represent the SDR for the next expected pattern\n", " print \"\\nAll the active and predicted cells:\"\n", " tm.printStates(printPrevious=False, printLearnState=False)\n", "\n", " # tm.getPredictedState() gets the predicted cells.\n", " # predictedCells[c][i] represents the state of the i'th cell in the c'th\n", " # column. To see if a column is predicted, we can simply take the OR\n", " # across all the cells in that column. In numpy we can do this by taking\n", " # the max along axis 1.\n", " print \"\\n\\nThe following columns are predicted by the temporal memory. This\"\n", " print \"should correspond to columns in the *next* item in the sequence.\"\n", " predictedCells = tm.getPredictedState()\n", " print formatRow(predictedCells.max(axis=1).nonzero())" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Networks and Regions\n", "\n", "See slides." ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Online Prediction Framework\n", "\n", "* CLAModel\n", "* OPF Client\n", "* Swarming\n", "\n", "# CLAModel\n", "\n", "From `examples/opf/clients/hotgym/simple/hotgym.py`" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Model Parameters\n", "\n", "`MODEL_PARAMS` have all of the parameters for the CLA model and subcomponents" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "# Model Params!\n", "MODEL_PARAMS = {\n", " # Type of model that the rest of these parameters apply to.\n", " 'model': \"HTMPrediction\",\n", "\n", " # Version that specifies the format of the config.\n", " 'version': 1,\n", "\n", " # Intermediate variables used to compute fields in modelParams and also\n", " # referenced from the control section.\n", " 'aggregationInfo': { 'days': 0,\n", " 'fields': [('consumption', 'sum')],\n", " 'hours': 1,\n", " 'microseconds': 0,\n", " 'milliseconds': 0,\n", " 'minutes': 0,\n", " 'months': 0,\n", " 'seconds': 0,\n", " 'weeks': 0,\n", " 'years': 0},\n", "\n", " 'predictAheadTime': None,\n", "\n", " # Model parameter dictionary.\n", " 'modelParams': {\n", " # The type of inference that this model will perform\n", " 'inferenceType': 'TemporalMultiStep',\n", "\n", " 'sensorParams': {\n", " # Sensor diagnostic output verbosity control;\n", " # if > 0: sensor region will print out on screen what it's sensing\n", " # at each step 0: silent; >=1: some info; >=2: more info;\n", " # >=3: even more info (see compute() in py/regions/RecordSensor.py)\n", " 'verbosity' : 0,\n", "\n", " # Include the encoders we use\n", " 'encoders': {\n", " u'timestamp_timeOfDay': {\n", " 'fieldname': u'timestamp',\n", " 'name': u'timestamp_timeOfDay',\n", " 'timeOfDay': (21, 0.5),\n", " 'type': 'DateEncoder'\n", " },\n", " u'timestamp_dayOfWeek': None,\n", " u'timestamp_weekend': None,\n", " u'consumption': {\n", " 'clipInput': True,\n", " 'fieldname': u'consumption',\n", " 'maxval': 100.0,\n", " 'minval': 0.0,\n", " 'n': 50,\n", " 'name': u'c1',\n", " 'type': 'ScalarEncoder',\n", " 'w': 21\n", " },\n", " },\n", "\n", " # A dictionary specifying the period for automatically-generated\n", " # resets from a RecordSensor;\n", " #\n", " # None = disable automatically-generated resets (also disabled if\n", " # all of the specified values evaluate to 0).\n", " # Valid keys is the desired combination of the following:\n", " # days, hours, minutes, seconds, milliseconds, microseconds, weeks\n", " #\n", " # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),\n", " #\n", " # (value generated from SENSOR_AUTO_RESET)\n", " 'sensorAutoReset' : None,\n", " },\n", "\n", " 'spEnable': True,\n", "\n", " 'spParams': {\n", " # SP diagnostic output verbosity control;\n", " # 0: silent; >=1: some info; >=2: more info;\n", " 'spVerbosity' : 0,\n", "\n", " # Spatial Pooler implementation selector, see getSPClass\n", " # in py/regions/SPRegion.py for details\n", " # 'py' (default), 'cpp' (speed optimized, new)\n", " 'spatialImp' : 'cpp',\n", "\n", " 'globalInhibition': 1,\n", "\n", " # Number of cell columns in the cortical region (same number for\n", " # SP and TM)\n", " # (see also tpNCellsPerCol)\n", " 'columnCount': 2048,\n", "\n", " 'inputWidth': 0,\n", "\n", " # SP inhibition control (absolute value);\n", " # Maximum number of active columns in the SP region's output (when\n", " # there are more, the weaker ones are suppressed)\n", " 'numActiveColumnsPerInhArea': 40,\n", "\n", " 'seed': 1956,\n", "\n", " # potentialPct\n", " # What percent of the columns's receptive field is available\n", " # for potential synapses. At initialization time, we will\n", " # choose potentialPct * (2*potentialRadius+1)^2\n", " 'potentialPct': 0.5,\n", "\n", " # The default connected threshold. Any synapse whose\n", " # permanence value is above the connected threshold is\n", " # a \"connected synapse\", meaning it can contribute to the\n", " # cell's firing. Typical value is 0.10. Cells whose activity\n", " # level before inhibition falls below minDutyCycleBeforeInh\n", " # will have their own internal synPermConnectedCell\n", " # threshold set below this default value.\n", " # (This concept applies to both SP and TM and so 'cells'\n", " # is correct here as opposed to 'columns')\n", " 'synPermConnected': 0.1,\n", "\n", " 'synPermActiveInc': 0.1,\n", "\n", " 'synPermInactiveDec': 0.005,\n", " },\n", "\n", " # Controls whether TM is enabled or disabled;\n", " # TM is necessary for making temporal predictions, such as predicting\n", " # the next inputs. Without TP, the model is only capable of\n", " # reconstructing missing sensor inputs (via SP).\n", " 'tmEnable' : True,\n", "\n", " 'tmParams': {\n", " # TM diagnostic output verbosity control;\n", " # 0: silent; [1..6]: increasing levels of verbosity\n", " # (see verbosity in nupic/trunk/py/nupic/research/TP.py and BacktrackingTMCPP.py)\n", " 'verbosity': 0,\n", "\n", " # Number of cell columns in the cortical region (same number for\n", " # SP and TM)\n", " # (see also tpNCellsPerCol)\n", " 'columnCount': 2048,\n", "\n", " # The number of cells (i.e., states), allocated per column.\n", " 'cellsPerColumn': 32,\n", "\n", " 'inputWidth': 2048,\n", "\n", " 'seed': 1960,\n", "\n", " # Temporal Pooler implementation selector (see _getTPClass in\n", " # CLARegion.py).\n", " 'temporalImp': 'cpp',\n", "\n", " # New Synapse formation count\n", " # NOTE: If None, use spNumActivePerInhArea\n", " #\n", " # TODO: need better explanation\n", " 'newSynapseCount': 20,\n", "\n", " # Maximum number of synapses per segment\n", " # > 0 for fixed-size CLA\n", " # -1 for non-fixed-size CLA\n", " #\n", " # TODO: for Ron: once the appropriate value is placed in TP\n", " # constructor, see if we should eliminate this parameter from\n", " # description.py.\n", " 'maxSynapsesPerSegment': 32,\n", "\n", " # Maximum number of segments per cell\n", " # > 0 for fixed-size CLA\n", " # -1 for non-fixed-size CLA\n", " #\n", " # TODO: for Ron: once the appropriate value is placed in TP\n", " # constructor, see if we should eliminate this parameter from\n", " # description.py.\n", " 'maxSegmentsPerCell': 128,\n", "\n", " # Initial Permanence\n", " # TODO: need better explanation\n", " 'initialPerm': 0.21,\n", "\n", " # Permanence Increment\n", " 'permanenceInc': 0.1,\n", "\n", " # Permanence Decrement\n", " # If set to None, will automatically default to tpPermanenceInc\n", " # value.\n", " 'permanenceDec' : 0.1,\n", "\n", " 'globalDecay': 0.0,\n", "\n", " 'maxAge': 0,\n", "\n", " # Minimum number of active synapses for a segment to be considered\n", " # during search for the best-matching segments.\n", " # None=use default\n", " # Replaces: tpMinThreshold\n", " 'minThreshold': 9,\n", "\n", " # Segment activation threshold.\n", " # A segment is active if it has >= tpSegmentActivationThreshold\n", " # connected synapses that are active due to infActiveState\n", " # None=use default\n", " # Replaces: tpActivationThreshold\n", " 'activationThreshold': 12,\n", "\n", " 'outputType': 'normal',\n", "\n", " # \"Pay Attention Mode\" length. This tells the TM how many new\n", " # elements to append to the end of a learned sequence at a time.\n", " # Smaller values are better for datasets with short sequences,\n", " # higher values are better for datasets with long sequences.\n", " 'pamLength': 1,\n", " },\n", "\n", " 'clParams': {\n", " 'regionName' : 'SDRClassifierRegion',\n", "\n", " # Classifier diagnostic output verbosity control;\n", " # 0: silent; [1..6]: increasing levels of verbosity\n", " 'verbosity' : 0,\n", "\n", " # This controls how fast the classifier learns/forgets. Higher values\n", " # make it adapt faster and forget older patterns faster.\n", " 'alpha': 0.005,\n", "\n", " # This is set after the call to updateConfigFromSubConfig and is\n", " # computed from the aggregationInfo and predictAheadTime.\n", " 'steps': '1,5',\n", "\n", " 'implementation': 'cpp',\n", " },\n", "\n", " 'trainSPNetOnlyIfRequested': False,\n", " },\n", "}" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Dataset Helpers" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/Users/mleborgne/_git/nupic/src/nupic/datafiles/extra/hotgym/hotgym.csv\n", "\n", "gym,address,timestamp,consumption\n", "string,string,datetime,float\n", "S,,T,\n", "Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:00:00.0,5.3\n", "Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:15:00.0,5.5\n", "Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:30:00.0,5.1\n", "Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 00:45:00.0,5.3\n", "Balgowlah Platinum,Shop 67 197-215 Condamine Street Balgowlah 2093,2010-07-02 01:00:00.0,5.2\n" ] } ], "source": [ "from pkg_resources import resource_filename\n", "\n", "datasetPath = resource_filename(\"nupic.datafiles\", \"extra/hotgym/hotgym.csv\")\n", "print datasetPath\n", "\n", "with open(datasetPath) as inputFile:\n", " print\n", " for _ in xrange(8):\n", " print inputFile.next().strip()" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Loading Data\n", "\n", "`FileRecordStream` - file reader for the NuPIC file format (CSV with three header rows, understands datetimes)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 0), 5.3]\n", "['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 15), 5.5]\n", "['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 30), 5.1]\n", "['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 0, 45), 5.3]\n", "['Balgowlah Platinum', 'Shop 67 197-215 Condamine Street Balgowlah 2093', datetime.datetime(2010, 7, 2, 1, 0), 5.2]\n" ] } ], "source": [ "from nupic.data.file_record_stream import FileRecordStream\n", "\n", "def getData():\n", " return FileRecordStream(datasetPath)\n", "\n", "data = getData()\n", "for _ in xrange(5):\n", " print data.next()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from nupic.frameworks.opf.model_factory import ModelFactory\n", "model = ModelFactory.create(MODEL_PARAMS)\n", "model.enableInference({'predictedField': 'consumption'})" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "input: 5.3\n", "prediction: 5.3\n", "input: 5.5\n", "prediction: 5.5\n", "input: 5.1\n", "prediction: 5.36\n", "input: 5.3\n", "prediction: 5.1\n", "input: 5.2\n", "prediction: 5.342\n", "input: 5.5\n", "prediction: 5.2994\n", "input: 4.5\n", "prediction: 5.35958\n", "input: 1.2\n", "prediction: 4.92\n", "input: 1.1\n", "prediction: 1.2\n", "input: 1.2\n", "prediction: 1.17\n", "input: 1.2\n", "prediction: 1.179\n", "input: 1.2\n", "prediction: 1.1853\n", "input: 1.2\n", "prediction: 1.18971\n", "input: 1.2\n", "prediction: 1.192797\n", "input: 1.1\n", "prediction: 1.1949579\n", "input: 1.2\n", "prediction: 1.16647053\n", "input: 1.1\n", "prediction: 1.176529371\n", "input: 1.2\n", "prediction: 1.1535705597\n", "input: 1.2\n", "prediction: 1.16749939179\n", "input: 1.1\n", "prediction: 1.17724957425\n", "input: 1.2\n", "prediction: 1.15407470198\n", "input: 6.0\n", "prediction: 1.16785229138\n", "input: 7.9\n", "prediction: 5.551706\n", "input: 8.4\n", "prediction: 6.2561942\n", "input: 10.6\n", "prediction: 6.89933594\n", "input: 12.4\n", "prediction: 10.6\n", "input: 12.1\n", "prediction: 12.4\n", "input: 12.4\n", "prediction: 12.31\n", "input: 11.4\n", "prediction: 12.337\n", "input: 11.2\n", "prediction: 10.84\n", "input: 10.8\n", "prediction: 10.948\n", "input: 12.0\n", "prediction: 10.9036\n", "input: 11.8\n", "prediction: 11.23252\n", "input: 11.9\n", "prediction: 11.402764\n", "input: 11.4\n", "prediction: 11.5519348\n", "input: 11.0\n", "prediction: 11.50635436\n", "input: 9.8\n", "prediction: 11.354448052\n", "input: 9.8\n", "prediction: 10.8881136364\n", "input: 10.8\n", "prediction: 10.5616795455\n", "input: 11.1\n", "prediction: 10.6331756818\n", "input: 11.1\n", "prediction: 10.7732229773\n", "input: 11.0\n", "prediction: 10.8712560841\n", "input: 10.7\n", "prediction: 10.9098792589\n", "input: 10.6\n", "prediction: 10.8469154812\n", "input: 10.3\n", "prediction: 10.7728408368\n", "input: 10.1\n", "prediction: 10.6309885858\n", "input: 12.9\n", "prediction: 10.4716920101\n", "input: 10.5\n", "prediction: 10.4716920101\n", "input: 9.7\n", "prediction: 10.480184407\n", "input: 9.7\n", "prediction: 10.2461290849\n", "input: 9.2\n", "prediction: 10.0822903594\n", "input: 9.2\n", "prediction: 9.81760325161\n", "input: 9.2\n", "prediction: 9.63232227613\n", "input: 9.3\n", "prediction: 9.50262559329\n", "input: 9.1\n", "prediction: 9.4418379153\n", "input: 9.0\n", "prediction: 9.33928654071\n", "input: 8.9\n", "prediction: 9.2375005785\n", "input: 9.0\n", "prediction: 9.13625040495\n", "input: 8.9\n", "prediction: 9.09537528346\n", "input: 8.9\n", "prediction: 9.03676269843\n", "input: 9.0\n", "prediction: 8.9957338889\n", "input: 9.2\n", "prediction: 8.99701372223\n", "input: 10.0\n", "prediction: 9.05790960556\n", "input: 10.7\n", "prediction: 9.34053672389\n", "input: 8.9\n", "prediction: 9.74837570672\n", "input: 9.0\n", "prediction: 9.49386299471\n", "input: 9.0\n", "prediction: 9.34570409629\n", "input: 9.3\n", "prediction: 9.24199286741\n", "input: 9.3\n", "prediction: 9.25939500718\n", "input: 9.1\n", "prediction: 9.27157650503\n", "input: 9.1\n", "prediction: 9.22010355352\n", "input: 9.1\n", "prediction: 9.18407248746\n", "input: 9.2\n", "prediction: 9.15885074122\n", "input: 9.4\n", "prediction: 9.17119551886\n", "input: 9.3\n", "prediction: 9.2398368632\n", "input: 9.3\n", "prediction: 9.25788580424\n", "input: 9.1\n", "prediction: 9.27052006297\n", "input: 9.1\n", "prediction: 9.21936404408\n", "input: 11.0\n", "prediction: 9.18355483085\n", "input: 9.0\n", "prediction: 9.7284883816\n", "input: 8.6\n", "prediction: 9.50994186712\n", "input: 3.0\n", "prediction: 9.50994186712\n", "input: 1.3\n", "prediction: 4.344\n", "input: 1.2\n", "prediction: 1.20749660397\n", "input: 1.3\n", "prediction: 1.20524762278\n", "input: 1.3\n", "prediction: 1.23367333594\n", "input: 1.3\n", "prediction: 1.25357133516\n", "input: 1.2\n", "prediction: 1.26749993461\n", "input: 1.3\n", "prediction: 1.24724995423\n", "input: 1.2\n", "prediction: 1.26307496796\n", "input: 1.3\n", "prediction: 1.24415247757\n", "input: 1.2\n", "prediction: 1.2609067343\n", "input: 1.3\n", "prediction: 1.24263471401\n", "input: 1.2\n", "prediction: 1.25984429981\n", "input: 1.1\n", "prediction: 1.24189100987\n", "input: 2.3\n", "prediction: 1.19932370691\n", "input: 5.5\n", "prediction: 3.7308\n", "input: 5.5\n", "prediction: 6.8366746106\n", "input: 5.8\n", "prediction: 6.43567222742\n", "input: 5.7\n", "prediction: 6.24497055919\n" ] } ], "source": [ "data = getData()\n", "for _ in xrange(100):\n", " record = dict(zip(data.getFieldNames(), data.next()))\n", " print \"input: \", record[\"consumption\"]\n", " result = model.run(record)\n", " print \"prediction: \", result.inferences[\"multiStepBestPredictions\"][1]" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5-step prediction: 1.19932370691\n" ] } ], "source": [ "print \"5-step prediction: \", result.inferences[\"multiStepBestPredictions\"][5]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "# Anomaly Score" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "# Model Params!\n", "MODEL_PARAMS = {\n", " # Type of model that the rest of these parameters apply to.\n", " 'model': \"HTMPrediction\",\n", "\n", " # Version that specifies the format of the config.\n", " 'version': 1,\n", "\n", " # Intermediate variables used to compute fields in modelParams and also\n", " # referenced from the control section.\n", " 'aggregationInfo': { 'days': 0,\n", " 'fields': [('consumption', 'sum')],\n", " 'hours': 1,\n", " 'microseconds': 0,\n", " 'milliseconds': 0,\n", " 'minutes': 0,\n", " 'months': 0,\n", " 'seconds': 0,\n", " 'weeks': 0,\n", " 'years': 0},\n", "\n", " 'predictAheadTime': None,\n", "\n", " # Model parameter dictionary.\n", " 'modelParams': {\n", " # The type of inference that this model will perform\n", " 'inferenceType': 'TemporalAnomaly',\n", "\n", " 'sensorParams': {\n", " # Sensor diagnostic output verbosity control;\n", " # if > 0: sensor region will print out on screen what it's sensing\n", " # at each step 0: silent; >=1: some info; >=2: more info;\n", " # >=3: even more info (see compute() in py/regions/RecordSensor.py)\n", " 'verbosity' : 0,\n", "\n", " # Include the encoders we use\n", " 'encoders': {\n", " u'timestamp_timeOfDay': {\n", " 'fieldname': u'timestamp',\n", " 'name': u'timestamp_timeOfDay',\n", " 'timeOfDay': (21, 0.5),\n", " 'type': 'DateEncoder'},\n", " u'timestamp_dayOfWeek': None,\n", " u'timestamp_weekend': None,\n", " u'consumption': {\n", " 'clipInput': True,\n", " 'fieldname': u'consumption',\n", " 'maxval': 100.0,\n", " 'minval': 0.0,\n", " 'n': 50,\n", " 'name': u'c1',\n", " 'type': 'ScalarEncoder',\n", " 'w': 21},},\n", "\n", " # A dictionary specifying the period for automatically-generated\n", " # resets from a RecordSensor;\n", " #\n", " # None = disable automatically-generated resets (also disabled if\n", " # all of the specified values evaluate to 0).\n", " # Valid keys is the desired combination of the following:\n", " # days, hours, minutes, seconds, milliseconds, microseconds, weeks\n", " #\n", " # Example for 1.5 days: sensorAutoReset = dict(days=1,hours=12),\n", " #\n", " # (value generated from SENSOR_AUTO_RESET)\n", " 'sensorAutoReset' : None,\n", " },\n", "\n", " 'spEnable': True,\n", "\n", " 'spParams': {\n", " # SP diagnostic output verbosity control;\n", " # 0: silent; >=1: some info; >=2: more info;\n", " 'spVerbosity' : 0,\n", "\n", " # Spatial Pooler implementation selector, see getSPClass\n", " # in py/regions/SPRegion.py for details\n", " # 'py' (default), 'cpp' (speed optimized, new)\n", " 'spatialImp' : 'cpp',\n", "\n", " 'globalInhibition': 1,\n", "\n", " # Number of cell columns in the cortical region (same number for\n", " # SP and TM)\n", " # (see also tpNCellsPerCol)\n", " 'columnCount': 2048,\n", "\n", " 'inputWidth': 0,\n", "\n", " # SP inhibition control (absolute value);\n", " # Maximum number of active columns in the SP region's output (when\n", " # there are more, the weaker ones are suppressed)\n", " 'numActiveColumnsPerInhArea': 40,\n", "\n", " 'seed': 1956,\n", "\n", " # potentialPct\n", " # What percent of the columns's receptive field is available\n", " # for potential synapses. At initialization time, we will\n", " # choose potentialPct * (2*potentialRadius+1)^2\n", " 'potentialPct': 0.5,\n", "\n", " # The default connected threshold. Any synapse whose\n", " # permanence value is above the connected threshold is\n", " # a \"connected synapse\", meaning it can contribute to the\n", " # cell's firing. Typical value is 0.10. Cells whose activity\n", " # level before inhibition falls below minDutyCycleBeforeInh\n", " # will have their own internal synPermConnectedCell\n", " # threshold set below this default value.\n", " # (This concept applies to both SP and TM and so 'cells'\n", " # is correct here as opposed to 'columns')\n", " 'synPermConnected': 0.1,\n", "\n", " 'synPermActiveInc': 0.1,\n", "\n", " 'synPermInactiveDec': 0.005,\n", " },\n", "\n", " # Controls whether TM is enabled or disabled;\n", " # TM is necessary for making temporal predictions, such as predicting\n", " # the next inputs. Without TP, the model is only capable of\n", " # reconstructing missing sensor inputs (via SP).\n", " 'tmEnable' : True,\n", "\n", " 'tmParams': {\n", " # TM diagnostic output verbosity control;\n", " # 0: silent; [1..6]: increasing levels of verbosity\n", " # (see verbosity in nupic/trunk/py/nupic/research/TP.py and BacktrackingTMCPP.py)\n", " 'verbosity': 0,\n", "\n", " # Number of cell columns in the cortical region (same number for\n", " # SP and TM)\n", " # (see also tpNCellsPerCol)\n", " 'columnCount': 2048,\n", "\n", " # The number of cells (i.e., states), allocated per column.\n", " 'cellsPerColumn': 32,\n", "\n", " 'inputWidth': 2048,\n", "\n", " 'seed': 1960,\n", "\n", " # Temporal Pooler implementation selector (see _getTPClass in\n", " # CLARegion.py).\n", " 'temporalImp': 'cpp',\n", "\n", " # New Synapse formation count\n", " # NOTE: If None, use spNumActivePerInhArea\n", " #\n", " # TODO: need better explanation\n", " 'newSynapseCount': 20,\n", "\n", " # Maximum number of synapses per segment\n", " # > 0 for fixed-size CLA\n", " # -1 for non-fixed-size CLA\n", " #\n", " # TODO: for Ron: once the appropriate value is placed in TP\n", " # constructor, see if we should eliminate this parameter from\n", " # description.py.\n", " 'maxSynapsesPerSegment': 32,\n", "\n", " # Maximum number of segments per cell\n", " # > 0 for fixed-size CLA\n", " # -1 for non-fixed-size CLA\n", " #\n", " # TODO: for Ron: once the appropriate value is placed in TP\n", " # constructor, see if we should eliminate this parameter from\n", " # description.py.\n", " 'maxSegmentsPerCell': 128,\n", "\n", " # Initial Permanence\n", " # TODO: need better explanation\n", " 'initialPerm': 0.21,\n", "\n", " # Permanence Increment\n", " 'permanenceInc': 0.1,\n", "\n", " # Permanence Decrement\n", " # If set to None, will automatically default to tpPermanenceInc\n", " # value.\n", " 'permanenceDec' : 0.1,\n", "\n", " 'globalDecay': 0.0,\n", "\n", " 'maxAge': 0,\n", "\n", " # Minimum number of active synapses for a segment to be considered\n", " # during search for the best-matching segments.\n", " # None=use default\n", " # Replaces: tpMinThreshold\n", " 'minThreshold': 9,\n", "\n", " # Segment activation threshold.\n", " # A segment is active if it has >= tpSegmentActivationThreshold\n", " # connected synapses that are active due to infActiveState\n", " # None=use default\n", " # Replaces: tpActivationThreshold\n", " 'activationThreshold': 12,\n", "\n", " 'outputType': 'normal',\n", "\n", " # \"Pay Attention Mode\" length. This tells the TM how many new\n", " # elements to append to the end of a learned sequence at a time.\n", " # Smaller values are better for datasets with short sequences,\n", " # higher values are better for datasets with long sequences.\n", " 'pamLength': 1,\n", " },\n", "\n", " 'clParams': {\n", " 'regionName' : 'SDRClassifierRegion',\n", "\n", " # Classifier diagnostic output verbosity control;\n", " # 0: silent; [1..6]: increasing levels of verbosity\n", " 'verbosity' : 0,\n", "\n", " # This controls how fast the classifier learns/forgets. Higher values\n", " # make it adapt faster and forget older patterns faster.\n", " 'alpha': 0.005,\n", "\n", " # This is set after the call to updateConfigFromSubConfig and is\n", " # computed from the aggregationInfo and predictAheadTime.\n", " 'steps': '1',\n", "\n", " 'implementation': 'cpp',\n", " },\n", "\n", " 'anomalyParams': {\n", " u'anomalyCacheRecords': None,\n", " u'autoDetectThreshold': None,\n", " u'autoDetectWaitRecords': 2184\n", " },\n", "\n", " 'trainSPNetOnlyIfRequested': False,\n", " },\n", "}" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "from nupic.frameworks.opf.model_factory import ModelFactory\n", "model = ModelFactory.create(MODEL_PARAMS)\n", "model.enableInference({'predictedField': 'consumption'})" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "input: 5.3\n", "prediction: 5.3\n", "input: 5.5\n", "prediction: 5.5\n", "input: 5.1\n", "prediction: 5.36\n", "input: 5.3\n", "prediction: 5.1\n", "input: 5.2\n", "prediction: 5.342\n" ] } ], "source": [ "data = getData()\n", "for _ in xrange(5):\n", " record = dict(zip(data.getFieldNames(), data.next()))\n", " print \"input: \", record[\"consumption\"]\n", " result = model.run(record)\n", " print \"prediction: \", result.inferences[\"multiStepBestPredictions\"][1]" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ModelResult(\tpredictionNumber=4\n", "\trawInput={'timestamp': datetime.datetime(2010, 7, 2, 1, 0), 'gym': 'Balgowlah Platinum', 'consumption': 5.2, 'address': 'Shop 67 197-215 Condamine Street Balgowlah 2093'}\n", "\tsensorInput=SensorInput(\tdataRow=(5.2, 1.0)\n", "\tdataDict={'timestamp': datetime.datetime(2010, 7, 2, 1, 0), 'gym': 'Balgowlah Platinum', 'consumption': 5.2, 'address': 'Shop 67 197-215 Condamine Street Balgowlah 2093'}\n", "\tdataEncodings=[array([ 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n", " 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32), array([ 0., 0., 0., ..., 0., 0., 0.], dtype=float32)]\n", "\tsequenceReset=0.0\n", "\tcategory=-1\n", ")\n", "\tinferences={'multiStepPredictions': {1: {5.1: 0.0088801263517415546, 5.2: 0.010775254623541418, 5.341999999999999: 0.98034461902471692}}, 'multiStepBucketLikelihoods': {1: {1: 0.0088801263517415546, 2: 0.98034461902471692}}, 'multiStepBestPredictions': {1: 5.341999999999999}, 'anomalyLabel': '[]', 'anomalyScore': 0.40000001}\n", "\tmetrics=None\n", "\tpredictedFieldIdx=0\n", "\tpredictedFieldName=consumption\n", "\tclassifierInput=ClassifierInput(\tdataRow=5.2\n", "\tbucketIndex=2\n", ")\n", ")\n" ] } ], "source": [ "print result" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "anomaly score: 0.4\n" ] } ], "source": [ "print \"anomaly score: \", result.inferences[\"anomalyScore\"]" ] }, { "cell_type": "markdown", "metadata": { "deletable": true, "editable": true }, "source": [ "__See Subutai's talk for more info on anomaly detection!__\n", "\n", "# Built-in OPF Clients\n", "\n", "`python examples/opf/bin/OpfRunExperiment.py examples/opf/experiments/multistep/hotgym/`\n", "\n", "Outputs `examples/opf/experiments/multistep/hotgym/inference/DefaultTask.TemporalMultiStep.predictionLog.csv`\n", "\n", "`python bin/run_swarm.py examples/opf/experiments/multistep/hotgym/permutations.py`\n", "\n", "Outputs `examples/opf/experiments/multistep/hotgym/model_0/description.py`" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "deletable": true, "editable": true }, "outputs": [], "source": [ "" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2.0 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }