{ "metadata": { "name": "", "signature": "sha256:0c245a0dbe84628ce57a80bb3c2993ade98489e9edd1b3207dc215e26662e847" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "\n", "" ] }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Trees - for CALAP data (Syriac)" ] }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Starting LAF-Fabric" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import sys\n", "import collections\n", "import random\n", "%load_ext autoreload\n", "%autoreload 2\n", "import laf\n", "from laf.fabric import LafFabric\n", "from etcbc.preprocess import prepare\n", "from etcbc.lib import Transcription, monad_set\n", "from etcbc.trees import Tree\n", "fabric = LafFabric()\n", "tr = Transcription()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s This is LAF-Fabric 4.3.3\n", "http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html\n" ] } ], "prompt_number": 1 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Declaring the features" ] }, { "cell_type": "code", "collapsed": false, "input": [ "API=fabric.load('calap', '--', 'trees', {\n", " \"xmlids\": {\"node\": False, \"edge\": False},\n", " \"features\": ('''\n", " oid otype monads\n", " surface_consonants\n", " psp\n", " phrase_type\n", " verse_label\n", " ''',''),\n", " \"prepare\": prepare,\n", "}, verbose='NORMAL')\n", "exec(fabric.localnames.format(var='fabric'))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s LOADING API: please wait ... \n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s INFO: USING DATA COMPILED AT: 2014-06-27T12-29-20\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.33s LOGFILE=/Users/dirk/laf-fabric-output/calap/trees/__log__trees.txt\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.38s INFO: DATA LOADED FROM SOURCE calap AND ANNOX -- FOR TASK trees AT 2014-07-15T16-05-01\n" ] } ], "prompt_number": 2 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Configuration" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here we define the formatting of the trees.\n", "\n", "Relevant nodes\n", "--------------\n", "Not all nodes will be shown in the output.\n", "The nodes that are shown, have abbreviated names.\n", "Nodes with ``True`` will be shown, nodes with ``False`` will be suppressed.\n", "\n", "Suppressing a node leaves its children in place. Another way of looking at it, is: we replace a node by its children.\n", "\n", "Exception: when a node is visited twice, the second visit refers to the tree built by the first visit.\n", "In that case, we do not suppress the node.\n", "\n", "**N.B.** It turns out that the ``-atom`` nodes are never visited twice.\n", "\n", "pos_table\n", "---------\n", "We abbreviate the part-of-speech tags. \n", "We include the pos-info by inserting a unary node right above each word." ] }, { "cell_type": "code", "collapsed": false, "input": [ "type_info = (\n", " (\"word\", ''),\n", " (\"phrase_atom\", 'U'),\n", " (\"phrase\", 'P'),\n", " (\"clause_atom\", 'S'),\n", ")\n", "type_table = dict(t for t in type_info)\n", "type_order = [t[0] for t in type_info]\n", "pos_table = {\n", " 'adjective': 'aj',\n", " 'adverb': 'av',\n", " 'conjunction': 'cj',\n", " 'interjection': 'ij',\n", " 'interrogative': 'ir',\n", " 'negative': 'ng',\n", " 'noun': 'n',\n", " 'preposition': 'pp',\n", " 'pronoun': 'pr',\n", " 'verb': 'vb',\n", "}\n", "\n", "tree_types = ('clause_atom', 'phrase', 'phrase_atom', 'word')\n", "(root_type, leaf_type, clause_type) = (tree_types[0], tree_types[-1], 'clause_atom')\n", "tree = Tree(API, otypes=tree_types, clause_type=None, ccr_feature=None, pt_feature='phrase_type', pos_feature='psp', mother_feature=None)\n", "tree.restructure_clauses(None)\n", "results = tree.relations()\n", "parent = results['rparent']\n", "sisters = results['sisters']\n", "children = results['rchildren']\n", "elder_sister = results['elder_sister']\n", "root_verse = {}\n", "\n", "for n in NN():\n", " otype = F.otype.v(n)\n", " if otype == 'verse': cur_verse = F.verse_label.v(n)\n", " elif otype == root_type: root_verse[n] = cur_verse\n", "\n", "msg(\"Ready for processing\")" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s LOADING API with EXTRAs: please wait ... \n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s INFO: USING DATA COMPILED AT: 2014-06-27T12-29-20\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.10s INFO: DATA LOADED FROM SOURCE calap AND ANNOX -- FOR TASK trees AT 2014-07-15T16-05-08\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s Start computing parent and children relations for objects of type clause_atom, phrase, phrase_atom, word\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 1.29s 100000 nodes\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 1.82s 141611 nodes: 130181 have parents and 87691 have children\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 1.82s Restructuring clauses: deep copying tree relations\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 2.86s Ready for processing\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_tag(node):\n", " otype = F.otype.v(node)\n", " tag = type_table[otype]\n", " if tag == 'P': tag = F.phrase_type.v(node)\n", " is_word = tag == ''\n", " pos = pos_table[F.psp.v(node)] if is_word else None\n", " monad = int(F.monads.v(node)) if is_word else None\n", " text = '\"{}\"'.format(tr.to_syriac(F.surface_consonants.v(node))) if is_word else None\n", " return (tag, pos, monad, text, is_word)\n", "\n", "def passage_roots(verse_label):\n", " sought = []\n", " grab = -1\n", " for n in NN():\n", " if grab == 1: continue\n", " otype = F.otype.v(n)\n", " if otype == 'verse': \n", " check = F.verse_label.v(n) == verse_label\n", " if check: grab = 0\n", " elif grab == 0: grab = 1\n", " if grab == 0 and otype == root_type: sought.append(n)\n", " return sought\n", "\n", "def showcases(cases, ofile):\n", " out = outfile(ofile)\n", " for snode in cases:\n", " out.write(\"\\n====================\\n{}\\n{}\\n{} bhs_id={} laf_node={}:\\n\".format(\n", " root_verse[snode], cases[snode], root_type, F.oid.v(snode), snode,\n", " ))\n", " for kind in ('e', 'r'):\n", " out.write(\"\\nTree based on monad embedding {}\\n\\n\".format(\n", " \"only\" if kind == 'e' else \" and mother+clause_constituent relation\"\n", " ))\n", " (tree_rep, words_rep, bmonad) = tree.write_tree(snode, kind, get_tag, rev=False, leafnumbers=False)\n", " out.write(\"{}\\n\\n{}\\n\".format(words_rep, tree_rep))\n", " out.write(\"\\nDepth={}\\n\".format(tree.depth(snode, kind)))\n", " out.write(tree.debug_write_tree(snode, kind, legenda=kind=='r'))\n", " out.close()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "msg(\"Writing {} trees\".format(root_type))\n", "trees = outfile(\"trees.txt\")\n", "verse_label = ''\n", "s = 0\n", "chunk = 10000\n", "sc = 0\n", "for node in NN():\n", " otype = F.otype.v(node)\n", " oid = F.oid.v(node)\n", " if otype == 'verse':\n", " verse_label = F.verse_label.v(node)\n", " continue\n", " if otype != root_type: continue\n", " (tree_rep, words_rep, bmonad) = tree.write_tree(node, 'r', get_tag, rev=False, leafnumbers=False)\n", " trees.write(\"\\n#{}\\tnode={}\\toid={}\\tbmonad={}\\t{}\\n{}\\n\".format(\n", " verse_label, node, oid, bmonad, words_rep, tree_rep,\n", " ))\n", " s += 1\n", " sc += 1\n", " if sc == chunk:\n", " msg(\"{} trees written\".format(s))\n", " sc = 0\n", "trees.close() \n", "msg(\"{} trees written\".format(s))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 11s Writing clause_atom trees\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 18s 10000 trees written\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 19s 11411 trees written\n" ] } ], "prompt_number": 5 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Checking for sanity" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "1. How many clause_atom nodes? \n", "1. Does any clause_atom have a parent? \n", "1. Is every top node a clause_atom?\n", "1. Do you reach all clause_atoms if you go up from words?\n", "1. Do you reach all words if you go down from clause_atoms? " ] }, { "cell_type": "code", "collapsed": false, "input": [ "#1\n", "msg(\"Counting {}s ...\".format(root_type))\n", "msg(\"There are {} {}s\".format(len(set(NN(test=F.otype.v, value=root_type))), root_type))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 23s Counting clause_atoms ...\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 23s There are 11411 clause_atoms\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "#2\n", "msg(\"Checking parents of {}s\".format(root_type))\n", "exceptions = set()\n", "for node in NN(test=F.otype.v, value=root_type):\n", " if node in parent: exceptions.add(node)\n", "if len(exceptions) == 0:\n", " msg(\"No {} has a parent\".format(root_type))\n", "else:\n", " msg(\"{} {}s have a parent:\".format(len(exceptions), root_type))\n", " for n in sorted(exceptions):\n", " p = parent[n]\n", " msg(\"{} {} [{}] has {} parent {} [{}]\".format(\n", " root_type, n, F.monads.v(n), \n", " F.otype.v(p), p, F.monads.v(p)\n", " ))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 25s Checking parents of clause_atoms\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s 13 clause_atoms have a parent:\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 53963 [257] has phrase parent 65481 [257,261,268]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 54180 [1259-1260] has phrase parent 66141 [1252,1259-1260]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 61168 [36003-36005] has phrase parent 87851 [35993,35995-36005]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 61185 [36057-36058] has phrase parent 87888 [36053,36057-36058]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 61204 [36115-36117] has phrase parent 87929 [36110-36111,36115-36117]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 61254 [36317-36319] has phrase parent 88069 [36312-36313,36317-36319]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 61449 [37049] has phrase parent 88611 [37049,37056]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 63071 [43736] has phrase parent 93449 [43736,43744-43745,43750-43751]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 64144 [48344] has phrase parent 96589 [48344,48346-48347]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 64146 [48346-48347] has phrase parent 96589 [48344,48346-48347]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 64178 [48471-48472] has phrase parent 96675 [48465-48466,48471-48472]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 64655 [50599] has phrase parent 98073 [50592-50594,50599]\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s clause_atom 64664 [50627-50628] has phrase parent 98093 [50620-50621,50627-50628]\n" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "#3 (again a check on #1)\n", "msg(\"Checking the types of root nodes ...\")\n", "exceptions = collections.defaultdict(lambda: [])\n", "sn = 0\n", "for node in NN():\n", " otype = F.otype.v(node)\n", " if otype not in type_table: continue\n", " if otype == root_type: sn += 1\n", " if node not in parent and node not in elder_sister and otype != root_type: \n", " exceptions[otype].append(node)\n", "if len(exceptions) == 0:\n", " msg(\"All top nodes are {}s\".format(root_type))\n", "else:\n", " msg(\"Top nodes which are not {}s:\".format(root_type))\n", " for t in sorted(exceptions):\n", " msg(\"{}: {}x\".format(t, len(exceptions[t])), withtime=False)\n", "msg(\"{} {}s seen\".format(sn, root_type))\n", "\n", "for c in exceptions[clause_type]:\n", " (s, st) = tree.get_root(c, 'e')\n", " v = root_verse[s]\n", " msg(\"{}={}, {}={}={}, verse={}\".format(clause_type, c, root_type, st, s, v), withtime=False)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 29s Checking the types of root nodes ...\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 29s Top nodes which are not clause_atoms:\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "phrase: 32x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 29s 11411 clause_atoms seen\n" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "#4, 5\n", "def get_top(kind, rel, rela, multi):\n", " seen = set()\n", " top_nodes = set()\n", " start_nodes = set(NN(test=F.otype.v, value=kind))\n", " next_nodes = start_nodes\n", " msg(\"Starting from {} nodes ...\".format(kind))\n", " while len(next_nodes):\n", " new_next_nodes = set()\n", " for node in next_nodes:\n", " if node in seen: continue\n", " seen.add(node)\n", " is_top = True\n", " if node in rel: \n", " is_top = False\n", " if multi:\n", " for c in rel[node]: new_next_nodes.add(c)\n", " else:\n", " new_next_nodes.add(rel[node])\n", " if node in rela: \n", " is_top = False\n", " if multi:\n", " for c in rela[node]: new_next_nodes.add(c)\n", " else:\n", " new_next_nodes.add(rela[node])\n", " if is_top: top_nodes.add(node)\n", " next_nodes = new_next_nodes\n", " top_types = collections.defaultdict(lambda: 0)\n", " for t in top_nodes:\n", " top_types[F.otype.v(t)] += 1\n", " for t in top_types:\n", " msg(\"From {} {} nodes reached {} {} nodes\".format(len(start_nodes), kind, top_types[t], t), withtime=False)\n", "\n", "msg(\"Embedding trees\")\n", "get_top(leaf_type, tree.eparent, {}, False)\n", "get_top(root_type, tree.echildren, {}, True)\n", "msg(\"Restructd trees\")\n", "get_top(leaf_type, tree.rparent, tree.elder_sister, False)\n", "get_top(root_type, tree.rchildren, tree.sisters, True)\n", "msg(\"Done\")" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 32s Embedding trees\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 32s Starting from word nodes ...\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "From 53920 word nodes reached 11398 clause_atom nodes\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "From 53920 word nodes reached 32 phrase nodes\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 33s Starting from clause_atom nodes ...\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "From 11411 clause_atom nodes reached 53864 word nodes\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 33s Restructd trees\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 33s Starting from word nodes ...\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "From 53920 word nodes reached 11398 clause_atom nodes\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "From 53920 word nodes reached 32 phrase nodes\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 33s Starting from clause_atom nodes ...\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "From 11411 clause_atom nodes reached 53864 word nodes\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 33s Done\n" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "#7\n", "msg(\"Which types embed which types and how often? ...\")\n", "for kind in ('e', 'r'):\n", " plinked_types = collections.defaultdict(lambda: 0)\n", " parent = tree.eparent if kind == 'e' else tree.rparent\n", " kindrep = 'embedding' if kind == 'e' else 'restructd'\n", " for (c, p) in parent.items():\n", " plinked_types[(F.otype.v(c), F.otype.v(p))] += 1\n", " msg(\"Found {} parent ({}) links between types\".format(len(parent), kindrep))\n", " for lt in sorted(plinked_types):\n", " msg(\"{}: {}x\".format(lt, plinked_types[lt]), withtime=False)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 35s Which types embed which types and how often? ...\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 35s Found 130181 parent (embedding) links between types\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('clause_atom', 'phrase'): 13x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('phrase', 'clause_atom'): 34863x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('phrase_atom', 'clause_atom'): 53x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('phrase_atom', 'phrase'): 41332x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('word', 'clause_atom'): 1x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('word', 'phrase_atom'): 53919x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 35s Found 130181 parent (restructd) links between types\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('clause_atom', 'phrase'): 13x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('phrase', 'clause_atom'): 34863x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('phrase_atom', 'clause_atom'): 53x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('phrase_atom', 'phrase'): 41332x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('word', 'clause_atom'): 1x\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "('word', 'phrase_atom'): 53919x\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "#11\n", "msg(\"Computing depths\")\n", "ntrees = 0\n", "rntrees = 0\n", "total_depth = {'e': 0, 'r': 0}\n", "rtotal_depth = {'e': 0, 'r': 0}\n", "max_depth = {'e': 0, 'r':0}\n", "rmax_depth = {'e': 0, 'r': 0}\n", "for node in NN(test=F.otype.v, value=root_type):\n", " ntrees += 1\n", " this_depth = {}\n", " for kind in ('e', 'r'):\n", " this_depth[kind] = tree.depth(node, kind)\n", " different = this_depth['e'] != this_depth['r']\n", " if different: rntrees += 1\n", " for kind in ('e', 'r'):\n", " if this_depth[kind] > max_depth[kind]: max_depth[kind] = this_depth[kind]\n", " total_depth[kind] += this_depth[kind]\n", " if different:\n", " if this_depth[kind] > rmax_depth[kind]: rmax_depth[kind] = this_depth[kind]\n", " rtotal_depth[kind] += this_depth[kind]\n", " \n", "msg(\"{} trees seen, of which in {} cases restructuring makes a difference in depth\".format(ntrees, rntrees))\n", "if ntrees > 0:\n", " msg(\"Embedding trees: max depth = {:>2}, average depth = {:.2g}\".format(max_depth['e'], total_depth['e'] / ntrees))\n", " msg(\"Restructd trees: max depth = {:>2}, average depth = {:.2g}\".format(max_depth['r'], total_depth['r'] / ntrees))\n", "if rntrees > 0:\n", " msg(\"Statistics for cases where restructuring makes a difference:\")\n", " msg(\"Embedding trees: max depth = {:>2}, average depth = {:.2g}\".format(rmax_depth['e'], rtotal_depth['e'] / rntrees))\n", " msg(\"Restructd trees: max depth = {:>2}, average depth = {:.2g}\".format(rmax_depth['r'], rtotal_depth['r'] / rntrees))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 37s Computing depths\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 38s 11411 trees seen, of which in 0 cases restructuring makes a difference in depth\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 38s Embedding trees: max depth = 3, average depth = 3\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 38s Restructd trees: max depth = 3, average depth = 3\n" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "close()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 40s Results directory:\n", "/Users/dirk/laf-fabric-output/calap/trees\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ "\n", "__log__trees.txt 3475 Tue Jul 15 18:05:49 2014\n", "trees.txt 1624637 Tue Jul 15 18:05:27 2014\n" ] } ], "prompt_number": 12 }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Preview" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here are the first lines of the output." ] }, { "cell_type": "code", "collapsed": false, "input": [ "!head -n 25 {my_file('trees.txt')}" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\r\n", "#1R 1,1\tnode=53920\toid=2856\tbmonad=1\t0 1 2 3\r\n", "(S(CP(U(cj \"\u0718\")))(NP(U(n \"\u0721\u0720\u071f\u0710\"))(U(n \"\u0715\u0718\u071d\u0715\")))(VP(U(vb \"\u0723\u0710\u0712\"))))\r\n", "\r\n", "#1R 1,1\tnode=53921\toid=2857\tbmonad=5\t0 1 2 3\r\n", "(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u0725\u0720\")))(PP(U(pp \"\u0712\")(n \"\u072b\u0722\u071d\u0710\"))))\r\n", "\r\n", "#1R 1,1\tnode=53922\toid=2858\tbmonad=9\t0 1 2 3 4 5\r\n", "(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u0721\u071f\u0723\u071d\u0722\")))(VP(U(vb \"\u0717\u0718\u0718\")))(PP(U(pp \"\u0720\u0717\")))(PP(U(pp \"\u0712\")(n \"\u0720\u0712\u0718\u072b\u0710\"))))\r\n", "\r\n", "#1R 1,1\tnode=53923\toid=2859\tbmonad=15\t0 1 2\r\n", "(S(CP(U(cj \"\u0718\")))(NegP(U(ng \"\u0720\u0710\")))(VP(U(vb \"\u072b\u071a\u0722\"))))\r\n", "\r\n", "#1R 1,2\tnode=53924\toid=2860\tbmonad=18\t0 1 2 3\r\n", "(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u0710\u0721\u072a\u0718\")))(PP(U(pp \"\u0720\u0717\")))(NP(U(n \"\u0725\u0712\u0715\u0718\u0717\u071d\"))))\r\n", "\r\n", "#1R 1,2\tnode=53925\toid=2861\tbmonad=22\t0 1 2 3 4 5 6 7 8\r\n", "(S(InjP(U(ij \"\u0717\u0710\")))(NP(U(n \"\u0725\u0712\u0715\u071d\u071f\"))(U(pp \"\u0729\u0715\u0721\u071d\u071f\")))(VP(U(vb \"\u0722\u0712\u0725\u0718\u0722\")))(PP(U(pp \"\u0720\")(n \"\u0721\u072a\u0722\"))(U(n \"\u0721\u0720\u071f\u0710\")))(NP(U(aj \"\u0725\u0720\u071d\u0721\u072c\u0710\"))(U(aj \"\u0712\u072c\u0718\u0720\u072c\u0710\"))))\r\n", "\r\n", "#1R 1,2\tnode=53926\toid=2862\tbmonad=31\t0 1 2 3\r\n", "(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u072c\u0729\u0718\u0721\")))(PP(U(pp \"\u0729\u0715\u0721\")(n \"\u0721\u0720\u071f\u0710\"))))\r\n", "\r\n", "#1R 1,2\tnode=53927\toid=2863\tbmonad=35\t0 1 2 3\r\n", "(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u072c\u0717\u0718\u0710\")))(PP(U(pp \"\u0720\u0717\")))(NP(U(aj \"\u0721\u072b\u0721\u072b\u0722\u071d\u072c\u0710\"))))\r\n", "\r\n" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }