{ "metadata": { "name": "PickleRedis" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Pickle Data and store in Redis - Python" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pickle" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "data1 = {'firstname': [1, 2.0, 3, 4],\n", " 'lastname': ('maiden', u'Smith'),\n", " 'account': None}\n", "\n", "pickle.dump(data1, open('data.pkl', 'wb'))\n", "\n", "data2 = pickle.load(open('data.pkl', 'rb'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "data1 == data2" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 3, "text": [ "True" ] } ], "prompt_number": 3 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Pickle a Person" ] }, { "cell_type": "code", "collapsed": false, "input": [ "class PicklePerson(object):\n", " def __init__(self, name, age, location):\n", " self.name = name\n", " self.age = age\n", " self.location = location\n", " \n", " def __repr__(self):\n", " return \"name: \" + self.name + \"\\n\" + \"age: \" + self.age + \\\n", " \"\\n\" + \"location: \" + self.location" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "john = PicklePerson(\"John\", \"60\", \"Smith\")\n", "print john" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "name: John\n", "age: 60\n", "location: Smith\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "pickle.dump(john, open(\"pickle_john\", \"wb\"))\n", "zombie_john = pickle.load(open(\"pickle_john\",\"r\"))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "zombie_john" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 7, "text": [ "name: John\n", "age: 60\n", "location: Smith" ] } ], "prompt_number": 7 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "cPickle" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import cPickle, os" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "%timeit pickle.dump([data1 for x in xrange(1000)], open(\"pickle_john\", \"wb\"))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "100 loops, best of 3: 5.57 ms per loop\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "%timeit cPickle.dump([data1 for x in xrange(1000)], open(\"pickle_john\", \"wb\"))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "1000 loops, best of 3: 1.48 ms per loop\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "os.path.getsize('/Users/antigen/Downloads/Pickle-and-Redis/pickle_john')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 11, "text": [ "4100" ] } ], "prompt_number": 11 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Redis - Images and Pickled Python object under 512mb" ] }, { "cell_type": "heading", "level": 4, "metadata": {}, "source": [ "A Python interface is redis is available at https://github.com/andymccurdy/redis-py" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import redis\n", "r = redis.StrictRedis(host='localhost', port=6379, db=0)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "r.set('foo', 'bar')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 13, "text": [ "True" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "r.get('foo')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 14, "text": [ "'bar'" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "%timeit r.set('foo', 'bar')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10000 loops, best of 3: 141 us per loop\n" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "%timeit r.get('foo')" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "10000 loops, best of 3: 123 us per loop\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "import string\n", "\n", "text = \"\"\"Pipelines are a subclass of the base Redis class that provide support \n", "for buffering multiple commands to the server in a single request. They can be \n", "used to dramatically increase the performance of groups of commands by reducing \n", "the number of back-and-forth TCP packets between the client and server.\"\"\"\n" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "for word in text.split():\n", " r.sadd(\"persistent\", word)\n", " \n", "print [r.srandmember('persistent') for i in xrange(10)]\n", "print [r.srandmember('persistent') for i in xrange(10)]" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "['p', 'P', 'increase', 'and', 'commands', 'for', 'single', 'f', 'a', 'u']\n", "['for', 'the', 'and', 'the', 'dramatically', 'be', 'f', 'R', 'used', 'packets']\n" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "for word in text.split():\n", " r.incr(word)\n", " \n", "for word in set(text.split()):\n", " if int(r.get(word)) > 2:\n", " print word\n", " print \"\\t\\t\", r.get(word)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "and\n", "\t\t4\n", "Redis\n", "\t\t4\n", "number\n", "\t\t4\n", "TCP\n", "\t\t4\n", "single\n", "\t\t4\n", "are\n", "\t\t4\n", "in\n", "\t\t4\n", "reducing\n", "\t\t4\n", "Pipelines\n", "\t\t4\n", "for\n", "\t\t4\n", "request.\n", "\t\t4\n", "provide\n", "\t\t4\n", "support\n", "\t\t4\n", "subclass\n", "\t\t4\n", "by\n", "\t\t4\n", "increase\n", "\t\t4\n", "to\n", "\t\t8\n", "They\n", "\t\t4\n", "between\n", "\t\t4\n", "performance\n", "\t\t4\n", "dramatically\n", "\t\t4\n", "be\n", "\t\t4\n", "used\n", "\t\t4\n", "multiple\n", "\t\t4\n", "that\n", "\t\t4\n", "server.\n", "\t\t4\n", "base\n", "\t\t4\n", "groups\n", "\t\t4\n", "buffering\n", "\t\t4\n", "class\n", "\t\t4\n", "a\n", "\t\t9\n", "commands\n", "\t\t8\n", "back-and-forth\n", "\t\t4\n", "of\n", "\t\t16\n", "packets\n", "\t\t4\n", "server\n", "\t\t4\n", "client\n", "\t\t4\n", "can\n", "\t\t4\n", "the\n", "\t\t20\n" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "bob = pickle.dumps(PicklePerson(\"bob\",\"50\",\"smith\"))\n", "print bob" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "ccopy_reg\n", "_reconstructor\n", "p0\n", "(c__main__\n", "PicklePerson\n", "p1\n", "c__builtin__\n", "object\n", "p2\n", "Ntp3\n", "Rp4\n", "(dp5\n", "S'age'\n", "p6\n", "S'50'\n", "p7\n", "sS'name'\n", "p8\n", "S'bob'\n", "p9\n", "sS'location'\n", "p10\n", "S'smith'\n", "p11\n", "sb.\n" ] } ], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "r.set(\"bob\", bob)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 21, "text": [ "True" ] } ], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "pickle.loads(r.get(\"bob\"))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 22, "text": [ "name: bob\n", "age: 50\n", "location: smith" ] } ], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "import redisco\n", "from redisco import connection_setup, models\n", "redisco.connection_setup(host='localhost', port=6379, db=0)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "class Person(models.Model):\n", " name = models.Attribute(required=True)\n", " age = models.Attribute(required=False)\n", " location = models.Attribute(required=False)\n", " \n", "for x in Person.objects.filter(name=\"Tim\"):\n", " x.delete()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "tim_smith = Person(name=\"Tim\",age=\"40\",location=\"Vancouver\")\n", "tim_smiths = Person(name=\"Tim\",age=\"70\",location=\"Burnaby\")" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "tim_smiths.save()\n", "tim_smith.save()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 27, "text": [ "True" ] } ], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "Person.objects.filter(name=\"Tim\")" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 28, "text": [ "[, ]" ] } ], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "Person.objects.filter(name=\"Tim\", age=\"40\")[0] == tim_smith" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 29, "text": [ "True" ] } ], "prompt_number": 29 } ], "metadata": {} } ] }