{ "metadata": { "name": "", "signature": "sha256:aba4639823d41a59be27fb05e556172d6bf24db95c4ab2299c7798b70a0f11cb" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualizing a CKY parser with IPythonBlocks" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import nltk\n", "from ipythonblocks import BlockGrid, colors\n", "# import time\n", "# from IPython.display import clear_output\n", "\n", "def print_chart(chart, num_of_tabs=1):\n", " \"\"\"\n", " Nicht notwendige, selbstgebastelte Funktion zum Drucken von Charts.\n", " Wandelt Sets (aus optischen Gr\u00fcnden) in Listen um und kann diese\n", " einger\u00fcckt (mit Tabulator) darstellen.\n", " \"\"\"\n", " tabstring = '\\t' * num_of_tabs\n", " for row in chart:\n", " print tabstring+\"\\t\".join([str(list(element)) for element in row])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "words = [\"I\", \"shot\", \"an\", \"elephant\", \"in\", \"my\", \"pajamas\"]\n", "n = len(words)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "grammar = nltk.parse_cfg(\"\"\"\n", " S -> NP VP\n", " PP -> P NP\n", " NP -> Det N | 'I' | NP PP\n", " VP -> V NP | VP PP\n", " Det -> 'an' | 'my'\n", " N -> 'elephant' | 'pajamas'\n", " V -> 'shot'\n", " P -> 'in'\n", " \"\"\")" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "# leere Chart bauen\n", "chart = []\n", "for i in range(n+1):\n", " # Erzeuge eine leere Zeile\n", " row = []\n", " # Haenge n+1 leere Mengen (set()) an\n", " for j in range(n+1):\n", " row.append(set())\n", " # Haenge die neue Zeile an die Chart an\n", " chart.append(row)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "grid = BlockGrid(n+1, n+1)\n", "known_terminals = set()\n", "\n", "# Terminalproduktionen\n", "# Iteriere ueber alle Positionen des Eingabesatzes\n", "for i in range(n):\n", " # Iteriere ueber alle Regeln der Grammatik, die das Wort words[i] als rechte Seite haben\n", " # z.B. words[4] = 'elephant' und N -> 'elephant'\n", " print \"productions for terminal '{}':\".format(words[i])\n", " for prod in grammar.productions(rhs=words[i]):\n", " print prod\n", " # Fuege in die Zelle fuer die Spanne von i bis i+1 die linke Regel (z.B. N) ein\n", " print \"\\t{0} added to chart[{1}][{2}]\\n\".format(prod.lhs(), i, i+1)\n", " grid[i, i+1] = colors['White']\n", " grid.flash(display_time=1.0)\n", " chart[i][i+1].add(prod.lhs())\n", " known_terminals.add((i, i+1))\n", " \n", "grid.show()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "