{
 "metadata": {
  "name": ""
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "TextBlob: Text Analytics for Humans\n",
      "========\n",
      "\n",
      "# _Ken Hu_\n",
      "\n",
      "# _@whosbacon_"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "source": [
      "## Who Am I?"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "notes"
      }
     },
     "source": [
      "+ 7+ years DevOps\n",
      "+ 5+ years Python\n",
      "+ 3+ years text analytics\n",
      "+ Digital nomad"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# I keep this as a cell in my title slide so I can rerun \n",
      "# it easily if I make changes, but it's low enough it won't\n",
      "# be visible in presentation mode.\n",
      "%run talktools"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "notes"
      }
     },
     "outputs": [
      {
       "html": [
        "<style>\n",
        "\n",
        ".rendered_html\n",
        "{\n",
        "  color: #2C5494;\n",
        "  font-family: Ubuntu;\n",
        "  font-size: 140%;\n",
        "  line-height: 1.1;\n",
        "  margin: 0.5em 0;\n",
        "  }\n",
        "\n",
        ".title\n",
        "{\n",
        "  color: #498AF3;\n",
        "  font-size: 250%;\n",
        "  font-weight:bold;\n",
        "  line-height: 1.2; \n",
        "  margin: 10px 50px 10px;\n",
        "  }\n",
        "\n",
        ".subtitle\n",
        "{\n",
        "  color: #386BBC;\n",
        "  font-size: 180%;\n",
        "  font-weight:bold;\n",
        "  line-height: 1.2; \n",
        "  margin: 20px 50px 20px;\n",
        "  }\n",
        "\n",
        ".slide-header, p.slide-header\n",
        "{\n",
        "  color: #498AF3;\n",
        "  font-size: 200%;\n",
        "  font-weight:bold;\n",
        "  margin: 0px 20px 10px;\n",
        "  page-break-before: always;\n",
        "  text-align: center;\n",
        "  }\n",
        "\n",
        ".rendered_html h1\n",
        "{\n",
        "  color: #498AF3;\n",
        "  line-height: 1.2; \n",
        "  margin: 0.15em 0em 0.5em;\n",
        "  page-break-before: always;\n",
        "  text-align: center;\n",
        "  }\n",
        "\n",
        "\n",
        ".rendered_html h2\n",
        "{ \n",
        "  color: #386BBC;\n",
        "  line-height: 1.2;\n",
        "  margin: 1.1em 0em 0.5em;\n",
        "  }\n",
        "\n",
        ".rendered_html h3\n",
        "{ \n",
        "  font-size: 100%;\n",
        "  line-height: 1.2;\n",
        "  margin: 1.1em 0em 0.5em;\n",
        "  }\n",
        "\n",
        ".rendered_html li\n",
        "{\n",
        "  line-height: 1.8;\n",
        "  }\n",
        "\n",
        ".input_prompt, .CodeMirror-lines, .output_area\n",
        "{\n",
        "  font-family: Consolas;\n",
        "  font-size: 120%;\n",
        "  }\n",
        "\n",
        ".gap-above\n",
        "{\n",
        "  padding-top: 200px;\n",
        "  }\n",
        "\n",
        ".gap01\n",
        "{\n",
        "  padding-top: 10px;\n",
        "  }\n",
        "\n",
        ".gap05\n",
        "{\n",
        "  padding-top: 50px;\n",
        "  }\n",
        "\n",
        ".gap1\n",
        "{\n",
        "  padding-top: 100px;\n",
        "  }\n",
        "\n",
        ".gap2\n",
        "{\n",
        "  padding-top: 200px;\n",
        "  }\n",
        "\n",
        ".gap3\n",
        "{\n",
        "  padding-top: 300px;\n",
        "  }\n",
        "\n",
        ".emph\n",
        "{\n",
        "  color: #386BBC;\n",
        "  }\n",
        "\n",
        ".warn\n",
        "{\n",
        "  color: red;\n",
        "  }\n",
        "\n",
        ".center\n",
        "{\n",
        "  text-align: center;\n",
        "  }\n",
        "\n",
        ".nb_link\n",
        "{\n",
        "    padding-bottom: 0.5em;\n",
        "}\n",
        "\n",
        "</style>\n"
       ],
       "metadata": {},
       "output_type": "display_data",
       "text": [
        "<IPython.core.display.HTML at 0xfaec50>"
       ]
      }
     ],
     "prompt_number": 1
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Itinerary\n",
      "\n",
      "+ A blob of text\n",
      "+ Basics\n",
      "+ Tokenization\n",
      "+ Parsing & tagging\n",
      "+ Spell correction\n",
      "+ Sentiment analysis\n",
      "+ Classification\n",
      "+ Blobber"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## A Blob of Text\n",
      "\n",
      "+ Popularity of text analytics / NLP\n",
      "+ nltk complexity\n",
      "+ TextBlob : nltk :: requests : Httplib2\n",
      "+ Object-oriented\n",
      "+ https://textblob.readthedocs.org/en/latest/\n",
      "+ https://github.com/sloria/textblob"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "!sudo pip install -U textblob\n",
      "!sudo pip install -U nltk textblob-aptagger\n",
      "!sudo python -m textblob.download_corpora\n",
      "\n",
      "import nltk\n",
      "nltk.download('wordnet')"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Basics"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import textblob\n",
      "\n",
      "blob = textblob.TextBlob(u'   Welcome to textblob: text analytics for humans   ')"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print type(blob)\n",
      "print blob[5:10]\n",
      "print blob.title()\n",
      "print blob.find('text')\n",
      "print blob.endswith(' ')\n",
      "print blob.stripped"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "subslide"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "<class 'textblob.blob.TextBlob'>\n",
        "lcome\n",
        "   Welcome To Textblob: Text Analytics For Humans   \n",
        "14\n",
        "True\n",
        "welcome to textblob text analytics for humans\n"
       ]
      }
     ],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print textblob.Word\n",
      "print textblob.Word(u'sweet').pluralize()\n",
      "print textblob.Word(u'sweets').singularize()\n",
      "print textblob.Word(u'running').lemmatize('v')"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "subslide"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "<class 'textblob.blob.Word'>\n",
        "sweets\n",
        "sweet\n",
        "run"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print type(blob.words)\n",
      "print blob.words.singularize()\n",
      "print blob.words.count('text')"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "<class 'textblob.blob.WordList'>\n",
        "[u'Welcome', u'to', u'textblob', u'text', u'analytic', u'for', u'human']\n",
        "1\n"
       ]
      }
     ],
     "prompt_number": 4
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Tokenization"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "blob = textblob.TextBlob(u'why is it so hot in here?!\\nthis is not hot at all')"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print blob.words\n",
      "print type(blob.words), type(blob.words[0])\n",
      "print blob.sentences\n",
      "print blob.ngrams()\n",
      "print blob.word_counts['hot']"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "subslide"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[u'why', u'is', u'it', u'so', u'hot', u'in', u'here', u'this', u'is', u'not', u'hot', u'at', u'all']\n",
        "<class 'textblob.blob.WordList'> <class 'textblob.blob.Word'>\n",
        "[Sentence(\"why is it so hot in here?!\"), Sentence(\"this is not hot at all\")]\n",
        "[WordList([u'why', u'is', u'it']), WordList([u'is', u'it', u'so']), WordList([u'it', u'so', u'hot']), WordList([u'so', u'hot', u'in']), WordList([u'hot', u'in', u'here']), WordList([u'in', u'here', u'this']), WordList([u'here', u'this', u'is']), WordList([u'this', u'is', u'not']), WordList([u'is', u'not', u'hot']), WordList([u'not', u'hot', u'at']), WordList([u'hot', u'at', u'all'])]\n",
        "2\n"
       ]
      }
     ],
     "prompt_number": 6
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print blob.tokenizer\n",
      "\n",
      "import nltk.tokenize\n",
      "\n",
      "blob.tokenizer = nltk.tokenize.LineTokenizer()\n",
      "print blob.tokenize()\n",
      "print blob.tokenize(nltk.tokenize.PunktWordTokenizer())"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "<textblob.tokenizers.WordTokenizer object at 0x36b3e70>\n",
        "[u'why is it so hot in here?!', u'this is not hot at all']\n",
        "[u'why', u'is', u'it', u'so', u'hot', u'in', u'here', u'?', u'!', u'this', u'is', u'not', u'hot', u'at', u'all']\n"
       ]
      }
     ],
     "prompt_number": 7
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Parsing & Tagging"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "blob = textblob.TextBlob(u'Ray Charles has got Georgia on his mind')"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 8
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print blob.noun_phrases\n",
      "print blob.pos_tags\n",
      "print blob.parse()"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[u'ray charles', u'georgia']\n",
        "[(u'Ray', u'NNP'), (u'Charles', u'NNP'), (u'has', u'VBZ'), (u'got', u'VBD'), (u'Georgia', u'NNP'), (u'on', u'IN'), (u'his', u'PRP$'), (u'mind', u'NN')]"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n",
        "Ray/NNP/B-NP/O Charles/NNP/I-NP/O has/VBZ/B-VP/O got/VBD/I-VP/O Georgia/NNP/B-NP/O on/IN/B-PP/B-PNP his/PRP$/B-NP/I-PNP mind/NN/I-NP/I-PNP\n"
       ]
      }
     ],
     "prompt_number": 9
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import textblob.np_extractors\n",
      "\n",
      "print blob.np_extractor\n",
      "blob = textblob.TextBlob(u'Ray Charles has got Georgia on his mind',\n",
      "                         np_extractor=textblob.np_extractors.ConllExtractor())\n",
      "print blob.noun_phrases"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "subslide"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "<textblob.en.np_extractors.FastNPExtractor object at 0x36b3fd0>\n",
        "[u'ray charles', u'georgia']"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 10
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import textblob.taggers\n",
      "\n",
      "print blob.pos_tagger\n",
      "blob = textblob.TextBlob(u'Ray Charles has got Georgia on his mind',\n",
      "                         pos_tagger=textblob.taggers.NLTKTagger())\n",
      "print blob.pos_tags"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "<textblob.en.taggers.PatternTagger object at 0x36b3bb0>\n",
        "[(u'Ray', u'NNP'), (u'Charles', u'NNP'), (u'has', u'VBZ'), (u'got', u'VBN'), (u'Georgia', u'NNP'), (u'on', u'IN'), (u'his', u'PRP$'), (u'mind', u'NN')]"
       ]
      },
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\n"
       ]
      }
     ],
     "prompt_number": 11
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Spell Correction"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "blob = textblob.TextBlob(u'I am runing across the boarder')"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 12
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print blob.correct() # based on Pattern\n",
      "print blob.words[2].spellcheck()"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "I am running across the border\n",
        "[(u'running', 0.8974358974358975), (u'ruling', 0.07692307692307693), (u'ruining', 0.019230769230769232), (u'tuning', 0.00641025641025641)]\n"
       ]
      }
     ],
     "prompt_number": 13
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Sentiment Analysis"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "blob = textblob.TextBlob(u'the weather is fantastic!')"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 14
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print blob.sentiment\n",
      "\n",
      "import textblob.sentiments\n",
      "\n",
      "print blob.analyzer\n",
      "blob.analyzer = textblob.sentiments.NaiveBayesAnalyzer()\n",
      "print blob.sentiment"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "Sentiment(polarity=0.5, subjectivity=0.9)\n",
        "<textblob.en.sentiments.PatternAnalyzer object at 0x36caed0>\n",
        "Sentiment(polarity=0.5, subjectivity=0.9)\n"
       ]
      }
     ],
     "prompt_number": 15
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Classification\n",
      "\n",
      "### Opposite Day Classifier"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "training = [\n",
      "            (u'tobey maguire is a terrible spiderman.','pos'),\n",
      "            (u'a terrible Javert (Russell Crowe) ruined Les Miserables for me...','pos'),\n",
      "            (u'The Dark Knight is the greatest superhero movie ever!','neg'),\n",
      "            (u'Fantastic Four should have never been made.','pos'),\n",
      "            (u'Wes Anderson is my favorite director!','neg'),\n",
      "            (u'Captain America 2 is pretty awesome.','neg'),\n",
      "            (u'Let\\s pretend \"Batman and Robin\" never happened..','pos'),\n",
      "            ]\n",
      "testing = [\n",
      "           (u'Superman was never an interesting character.','pos'),\n",
      "           (u'Fantastic Mr Fox is an awesome film!','neg'),\n",
      "           (u'Dragonball Evolution is simply terrible!!','pos')\n",
      "           ]"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 16
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import textblob.classifiers\n",
      "\n",
      "classifier = textblob.classifiers.NaiveBayesClassifier(training)\n",
      "print classifier.accuracy(testing)\n",
      "classifier.show_informative_features(3)"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "subslide"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "1.0\n",
        "Most Informative Features\n",
        "            contains(is) = True              neg : pos    =      2.9 : 1.0\n",
        "      contains(terrible) = False             neg : pos    =      1.7 : 1.0\n",
        "         contains(never) = False             neg : pos    =      1.7 : 1.0\n"
       ]
      }
     ],
     "prompt_number": 17
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "blob = textblob.TextBlob(u'the weather is terrible!', classifier=classifier)\n",
      "print blob.classify()"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "pos\n"
       ]
      }
     ],
     "prompt_number": 18
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Blobber"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "subslide"
      }
     },
     "source": [
      "### Factory Method"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "np_extractor = textblob.np_extractors.ConllExtractor()\n",
      "pos_tagger = textblob.taggers.NLTKTagger()\n",
      "tokenizer = nltk.tokenize.PunktWordTokenizer()\n",
      "analyzer = textblob.sentiments.NaiveBayesAnalyzer()"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 19
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "blob = textblob.TextBlob(u'Dog goes woof. Cat goes meow. Bird goes tweet. And mouse goes squeek.',\n",
      "                         np_extractor=np_extractor,\n",
      "                         pos_tagger=pos_tagger,\n",
      "                         tokenizer=tokenizer,\n",
      "                         analyzer=analyzer)\n",
      "# do something with blob\n",
      "\n",
      "blob2 = textblob.TextBlob(u'Cow goes moo. Frog goes croak. And the elephant goes toot.',\n",
      "                          np_extractor=np_extractor,\n",
      "                          pos_tagger=pos_tagger,\n",
      "                          tokenizer=tokenizer,\n",
      "                          analyzer=analyzer)\n",
      "# do something with blob2\n",
      "\n",
      "blob3 = textblob.TextBlob(u'Ducks say quack. And fish go blub. And the seal goes ow ow ow ow ow.',\n",
      "                          np_extractor=np_extractor,\n",
      "                          pos_tagger=pos_tagger,\n",
      "                          tokenizer=tokenizer,\n",
      "                          analyzer=analyzer)\n",
      "# do something with blob3"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 20
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "blobber = textblob.Blobber(\n",
      "                           np_extractor=np_extractor,\n",
      "                           pos_tagger=pos_tagger,\n",
      "                           tokenizer=tokenizer,\n",
      "                           analyzer=analyzer)\n",
      "\n",
      "blob = blobber(u'But there\\'s one sound that no one knows: What does the fox say?')\n",
      "\n",
      "print blob\n",
      "print blob.np_extractor\n",
      "print blob.pos_tagger\n",
      "print blob.tokenizer\n",
      "print blob.analyzer"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "subslide"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "But there's one sound that no one knows: What does the fox say?\n",
        "<textblob.en.np_extractors.ConllExtractor object at 0x86a38f0>\n",
        "<textblob.en.taggers.NLTKTagger object at 0x86a3970>\n",
        "<nltk.tokenize.punkt.PunktWordTokenizer object at 0x86a3870>\n",
        "<textblob.en.sentiments.NaiveBayesAnalyzer object at 0x86a3850>\n"
       ]
      }
     ],
     "prompt_number": 21
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print map(blobber, ['Ring-ding-ding-ding-dingeringeding!',\n",
      "                    'Wa-pa-pa-pa-pa-pa-pow!',\n",
      "                    'Hatee-hatee-hatee-ho!',\n",
      "                    'Joff-tchoff-tchoffo-tchoffo-tchoff!'])"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "[TextBlob(\"Ring-ding-ding-ding-dingeringeding!\"), TextBlob(\"Wa-pa-pa-pa-pa-pa-pow!\"), TextBlob(\"Hatee-hatee-hatee-ho!\"), TextBlob(\"Joff-tchoff-tchoffo-tchoffo-tchoff!\")]\n"
       ]
      }
     ],
     "prompt_number": 22
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "subslide"
      }
     },
     "source": [
      "### Classifier Comparison"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "training = [\n",
      "            (u'tobey maguire is a terrible spiderman.','pos'),\n",
      "            (u'a terrible Javert (Russell Crowe) ruined Les Miserables for me...','pos'),\n",
      "            (u'The Dark Knight is the greatest superhero movie ever!','neg'),\n",
      "            (u'Fantastic Four should have never been made.','pos'),\n",
      "            (u'Wes Anderson is my favorite director!','neg'),\n",
      "            (u'Captain America 2 is pretty awesome.','neg'),\n",
      "            (u'Let\\s pretend \"Batman and Robin\" never happened..','pos'),\n",
      "            ]\n",
      "texts = [\n",
      "         u'Superman was never an interesting character.',\n",
      "         u'Fantastic Mr Fox is an awesome film!',\n",
      "         u'Dragonball Evolution is simply terrible!!'\n",
      "         ]"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 23
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import textblob.classifiers\n",
      "\n",
      "nb_classifier = textblob.classifiers.NaiveBayesClassifier(training)\n",
      "dt_classifier = textblob.classifiers.DecisionTreeClassifier(training)"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [],
     "prompt_number": 24
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "for text in texts:\n",
      "    nb_class = textblob.TextBlob(text, classifier=nb_classifier).classify()\n",
      "    dt_class = textblob.TextBlob(text, classifier=dt_classifier).classify()\n",
      "    print nb_class == dt_class"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "subslide"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "True\n",
        "True\n",
        "False\n"
       ]
      }
     ],
     "prompt_number": 25
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "nb_blobber = textblob.Blobber(classifier=nb_classifier)\n",
      "dt_blobber = textblob.Blobber(classifier=dt_classifier)\n",
      "\n",
      "for text in texts:\n",
      "    print nb_blobber(text).classify() == dt_blobber(text).classify()"
     ],
     "language": "python",
     "metadata": {
      "slideshow": {
       "slide_type": "fragment"
      }
     },
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "True\n",
        "True\n",
        "False\n"
       ]
      }
     ],
     "prompt_number": 26
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Future Blob\n",
      "\n",
      "+ Looking for contributors and maintainers\n",
      "+ http://textblob.readthedocs.org/en/dev/contributing.html#extension-development"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {
      "slideshow": {
       "slide_type": "slide"
      }
     },
     "source": [
      "## Fin\n",
      "\n",
      "+ @whosbacon\n",
      "+ http://whosbacon.com\n",
      "+ http://nbviewer.ipython.org/github/whosken/textblob-intro/blob/master/TextBlob.ipynb"
     ]
    }
   ],
   "metadata": {}
  }
 ]
}