{
"metadata": {
"name": "",
"signature": "sha256:0c245a0dbe84628ce57a80bb3c2993ade98489e9edd1b3207dc215e26662e847"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"\n",
""
]
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Trees - for CALAP data (Syriac)"
]
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Starting LAF-Fabric"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import sys\n",
"import collections\n",
"import random\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"import laf\n",
"from laf.fabric import LafFabric\n",
"from etcbc.preprocess import prepare\n",
"from etcbc.lib import Transcription, monad_set\n",
"from etcbc.trees import Tree\n",
"fabric = LafFabric()\n",
"tr = Transcription()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 0.00s This is LAF-Fabric 4.3.3\n",
"http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html\n"
]
}
],
"prompt_number": 1
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Declaring the features"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"API=fabric.load('calap', '--', 'trees', {\n",
" \"xmlids\": {\"node\": False, \"edge\": False},\n",
" \"features\": ('''\n",
" oid otype monads\n",
" surface_consonants\n",
" psp\n",
" phrase_type\n",
" verse_label\n",
" ''',''),\n",
" \"prepare\": prepare,\n",
"}, verbose='NORMAL')\n",
"exec(fabric.localnames.format(var='fabric'))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 0.00s LOADING API: please wait ... \n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 0.00s INFO: USING DATA COMPILED AT: 2014-06-27T12-29-20\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 0.33s LOGFILE=/Users/dirk/laf-fabric-output/calap/trees/__log__trees.txt\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 0.38s INFO: DATA LOADED FROM SOURCE calap AND ANNOX -- FOR TASK trees AT 2014-07-15T16-05-01\n"
]
}
],
"prompt_number": 2
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Configuration"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here we define the formatting of the trees.\n",
"\n",
"Relevant nodes\n",
"--------------\n",
"Not all nodes will be shown in the output.\n",
"The nodes that are shown, have abbreviated names.\n",
"Nodes with ``True`` will be shown, nodes with ``False`` will be suppressed.\n",
"\n",
"Suppressing a node leaves its children in place. Another way of looking at it, is: we replace a node by its children.\n",
"\n",
"Exception: when a node is visited twice, the second visit refers to the tree built by the first visit.\n",
"In that case, we do not suppress the node.\n",
"\n",
"**N.B.** It turns out that the ``-atom`` nodes are never visited twice.\n",
"\n",
"pos_table\n",
"---------\n",
"We abbreviate the part-of-speech tags. \n",
"We include the pos-info by inserting a unary node right above each word."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"type_info = (\n",
" (\"word\", ''),\n",
" (\"phrase_atom\", 'U'),\n",
" (\"phrase\", 'P'),\n",
" (\"clause_atom\", 'S'),\n",
")\n",
"type_table = dict(t for t in type_info)\n",
"type_order = [t[0] for t in type_info]\n",
"pos_table = {\n",
" 'adjective': 'aj',\n",
" 'adverb': 'av',\n",
" 'conjunction': 'cj',\n",
" 'interjection': 'ij',\n",
" 'interrogative': 'ir',\n",
" 'negative': 'ng',\n",
" 'noun': 'n',\n",
" 'preposition': 'pp',\n",
" 'pronoun': 'pr',\n",
" 'verb': 'vb',\n",
"}\n",
"\n",
"tree_types = ('clause_atom', 'phrase', 'phrase_atom', 'word')\n",
"(root_type, leaf_type, clause_type) = (tree_types[0], tree_types[-1], 'clause_atom')\n",
"tree = Tree(API, otypes=tree_types, clause_type=None, ccr_feature=None, pt_feature='phrase_type', pos_feature='psp', mother_feature=None)\n",
"tree.restructure_clauses(None)\n",
"results = tree.relations()\n",
"parent = results['rparent']\n",
"sisters = results['sisters']\n",
"children = results['rchildren']\n",
"elder_sister = results['elder_sister']\n",
"root_verse = {}\n",
"\n",
"for n in NN():\n",
" otype = F.otype.v(n)\n",
" if otype == 'verse': cur_verse = F.verse_label.v(n)\n",
" elif otype == root_type: root_verse[n] = cur_verse\n",
"\n",
"msg(\"Ready for processing\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 0.00s LOADING API with EXTRAs: please wait ... \n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 0.00s INFO: USING DATA COMPILED AT: 2014-06-27T12-29-20\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 0.10s INFO: DATA LOADED FROM SOURCE calap AND ANNOX -- FOR TASK trees AT 2014-07-15T16-05-08\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 0.00s Start computing parent and children relations for objects of type clause_atom, phrase, phrase_atom, word\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 1.29s 100000 nodes\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 1.82s 141611 nodes: 130181 have parents and 87691 have children\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 1.82s Restructuring clauses: deep copying tree relations\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 2.86s Ready for processing\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_tag(node):\n",
" otype = F.otype.v(node)\n",
" tag = type_table[otype]\n",
" if tag == 'P': tag = F.phrase_type.v(node)\n",
" is_word = tag == ''\n",
" pos = pos_table[F.psp.v(node)] if is_word else None\n",
" monad = int(F.monads.v(node)) if is_word else None\n",
" text = '\"{}\"'.format(tr.to_syriac(F.surface_consonants.v(node))) if is_word else None\n",
" return (tag, pos, monad, text, is_word)\n",
"\n",
"def passage_roots(verse_label):\n",
" sought = []\n",
" grab = -1\n",
" for n in NN():\n",
" if grab == 1: continue\n",
" otype = F.otype.v(n)\n",
" if otype == 'verse': \n",
" check = F.verse_label.v(n) == verse_label\n",
" if check: grab = 0\n",
" elif grab == 0: grab = 1\n",
" if grab == 0 and otype == root_type: sought.append(n)\n",
" return sought\n",
"\n",
"def showcases(cases, ofile):\n",
" out = outfile(ofile)\n",
" for snode in cases:\n",
" out.write(\"\\n====================\\n{}\\n{}\\n{} bhs_id={} laf_node={}:\\n\".format(\n",
" root_verse[snode], cases[snode], root_type, F.oid.v(snode), snode,\n",
" ))\n",
" for kind in ('e', 'r'):\n",
" out.write(\"\\nTree based on monad embedding {}\\n\\n\".format(\n",
" \"only\" if kind == 'e' else \" and mother+clause_constituent relation\"\n",
" ))\n",
" (tree_rep, words_rep, bmonad) = tree.write_tree(snode, kind, get_tag, rev=False, leafnumbers=False)\n",
" out.write(\"{}\\n\\n{}\\n\".format(words_rep, tree_rep))\n",
" out.write(\"\\nDepth={}\\n\".format(tree.depth(snode, kind)))\n",
" out.write(tree.debug_write_tree(snode, kind, legenda=kind=='r'))\n",
" out.close()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 4
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"msg(\"Writing {} trees\".format(root_type))\n",
"trees = outfile(\"trees.txt\")\n",
"verse_label = ''\n",
"s = 0\n",
"chunk = 10000\n",
"sc = 0\n",
"for node in NN():\n",
" otype = F.otype.v(node)\n",
" oid = F.oid.v(node)\n",
" if otype == 'verse':\n",
" verse_label = F.verse_label.v(node)\n",
" continue\n",
" if otype != root_type: continue\n",
" (tree_rep, words_rep, bmonad) = tree.write_tree(node, 'r', get_tag, rev=False, leafnumbers=False)\n",
" trees.write(\"\\n#{}\\tnode={}\\toid={}\\tbmonad={}\\t{}\\n{}\\n\".format(\n",
" verse_label, node, oid, bmonad, words_rep, tree_rep,\n",
" ))\n",
" s += 1\n",
" sc += 1\n",
" if sc == chunk:\n",
" msg(\"{} trees written\".format(s))\n",
" sc = 0\n",
"trees.close() \n",
"msg(\"{} trees written\".format(s))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 11s Writing clause_atom trees\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 18s 10000 trees written\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 19s 11411 trees written\n"
]
}
],
"prompt_number": 5
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Checking for sanity"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. How many clause_atom nodes? \n",
"1. Does any clause_atom have a parent? \n",
"1. Is every top node a clause_atom?\n",
"1. Do you reach all clause_atoms if you go up from words?\n",
"1. Do you reach all words if you go down from clause_atoms? "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#1\n",
"msg(\"Counting {}s ...\".format(root_type))\n",
"msg(\"There are {} {}s\".format(len(set(NN(test=F.otype.v, value=root_type))), root_type))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 23s Counting clause_atoms ...\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 23s There are 11411 clause_atoms\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#2\n",
"msg(\"Checking parents of {}s\".format(root_type))\n",
"exceptions = set()\n",
"for node in NN(test=F.otype.v, value=root_type):\n",
" if node in parent: exceptions.add(node)\n",
"if len(exceptions) == 0:\n",
" msg(\"No {} has a parent\".format(root_type))\n",
"else:\n",
" msg(\"{} {}s have a parent:\".format(len(exceptions), root_type))\n",
" for n in sorted(exceptions):\n",
" p = parent[n]\n",
" msg(\"{} {} [{}] has {} parent {} [{}]\".format(\n",
" root_type, n, F.monads.v(n), \n",
" F.otype.v(p), p, F.monads.v(p)\n",
" ))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s Checking parents of clause_atoms\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s 13 clause_atoms have a parent:\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 53963 [257] has phrase parent 65481 [257,261,268]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 54180 [1259-1260] has phrase parent 66141 [1252,1259-1260]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 61168 [36003-36005] has phrase parent 87851 [35993,35995-36005]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 61185 [36057-36058] has phrase parent 87888 [36053,36057-36058]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 61204 [36115-36117] has phrase parent 87929 [36110-36111,36115-36117]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 61254 [36317-36319] has phrase parent 88069 [36312-36313,36317-36319]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 61449 [37049] has phrase parent 88611 [37049,37056]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 63071 [43736] has phrase parent 93449 [43736,43744-43745,43750-43751]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 64144 [48344] has phrase parent 96589 [48344,48346-48347]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 64146 [48346-48347] has phrase parent 96589 [48344,48346-48347]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 64178 [48471-48472] has phrase parent 96675 [48465-48466,48471-48472]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 64655 [50599] has phrase parent 98073 [50592-50594,50599]\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 25s clause_atom 64664 [50627-50628] has phrase parent 98093 [50620-50621,50627-50628]\n"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#3 (again a check on #1)\n",
"msg(\"Checking the types of root nodes ...\")\n",
"exceptions = collections.defaultdict(lambda: [])\n",
"sn = 0\n",
"for node in NN():\n",
" otype = F.otype.v(node)\n",
" if otype not in type_table: continue\n",
" if otype == root_type: sn += 1\n",
" if node not in parent and node not in elder_sister and otype != root_type: \n",
" exceptions[otype].append(node)\n",
"if len(exceptions) == 0:\n",
" msg(\"All top nodes are {}s\".format(root_type))\n",
"else:\n",
" msg(\"Top nodes which are not {}s:\".format(root_type))\n",
" for t in sorted(exceptions):\n",
" msg(\"{}: {}x\".format(t, len(exceptions[t])), withtime=False)\n",
"msg(\"{} {}s seen\".format(sn, root_type))\n",
"\n",
"for c in exceptions[clause_type]:\n",
" (s, st) = tree.get_root(c, 'e')\n",
" v = root_verse[s]\n",
" msg(\"{}={}, {}={}={}, verse={}\".format(clause_type, c, root_type, st, s, v), withtime=False)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 29s Checking the types of root nodes ...\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 29s Top nodes which are not clause_atoms:\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"phrase: 32x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 29s 11411 clause_atoms seen\n"
]
}
],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#4, 5\n",
"def get_top(kind, rel, rela, multi):\n",
" seen = set()\n",
" top_nodes = set()\n",
" start_nodes = set(NN(test=F.otype.v, value=kind))\n",
" next_nodes = start_nodes\n",
" msg(\"Starting from {} nodes ...\".format(kind))\n",
" while len(next_nodes):\n",
" new_next_nodes = set()\n",
" for node in next_nodes:\n",
" if node in seen: continue\n",
" seen.add(node)\n",
" is_top = True\n",
" if node in rel: \n",
" is_top = False\n",
" if multi:\n",
" for c in rel[node]: new_next_nodes.add(c)\n",
" else:\n",
" new_next_nodes.add(rel[node])\n",
" if node in rela: \n",
" is_top = False\n",
" if multi:\n",
" for c in rela[node]: new_next_nodes.add(c)\n",
" else:\n",
" new_next_nodes.add(rela[node])\n",
" if is_top: top_nodes.add(node)\n",
" next_nodes = new_next_nodes\n",
" top_types = collections.defaultdict(lambda: 0)\n",
" for t in top_nodes:\n",
" top_types[F.otype.v(t)] += 1\n",
" for t in top_types:\n",
" msg(\"From {} {} nodes reached {} {} nodes\".format(len(start_nodes), kind, top_types[t], t), withtime=False)\n",
"\n",
"msg(\"Embedding trees\")\n",
"get_top(leaf_type, tree.eparent, {}, False)\n",
"get_top(root_type, tree.echildren, {}, True)\n",
"msg(\"Restructd trees\")\n",
"get_top(leaf_type, tree.rparent, tree.elder_sister, False)\n",
"get_top(root_type, tree.rchildren, tree.sisters, True)\n",
"msg(\"Done\")"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 32s Embedding trees\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 32s Starting from word nodes ...\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"From 53920 word nodes reached 11398 clause_atom nodes\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"From 53920 word nodes reached 32 phrase nodes\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 33s Starting from clause_atom nodes ...\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"From 11411 clause_atom nodes reached 53864 word nodes\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 33s Restructd trees\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 33s Starting from word nodes ...\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"From 53920 word nodes reached 11398 clause_atom nodes\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"From 53920 word nodes reached 32 phrase nodes\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 33s Starting from clause_atom nodes ...\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"From 11411 clause_atom nodes reached 53864 word nodes\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 33s Done\n"
]
}
],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#7\n",
"msg(\"Which types embed which types and how often? ...\")\n",
"for kind in ('e', 'r'):\n",
" plinked_types = collections.defaultdict(lambda: 0)\n",
" parent = tree.eparent if kind == 'e' else tree.rparent\n",
" kindrep = 'embedding' if kind == 'e' else 'restructd'\n",
" for (c, p) in parent.items():\n",
" plinked_types[(F.otype.v(c), F.otype.v(p))] += 1\n",
" msg(\"Found {} parent ({}) links between types\".format(len(parent), kindrep))\n",
" for lt in sorted(plinked_types):\n",
" msg(\"{}: {}x\".format(lt, plinked_types[lt]), withtime=False)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 35s Which types embed which types and how often? ...\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 35s Found 130181 parent (embedding) links between types\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('clause_atom', 'phrase'): 13x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('phrase', 'clause_atom'): 34863x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('phrase_atom', 'clause_atom'): 53x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('phrase_atom', 'phrase'): 41332x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('word', 'clause_atom'): 1x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('word', 'phrase_atom'): 53919x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 35s Found 130181 parent (restructd) links between types\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('clause_atom', 'phrase'): 13x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('phrase', 'clause_atom'): 34863x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('phrase_atom', 'clause_atom'): 53x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('phrase_atom', 'phrase'): 41332x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('word', 'clause_atom'): 1x\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"('word', 'phrase_atom'): 53919x\n"
]
}
],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#11\n",
"msg(\"Computing depths\")\n",
"ntrees = 0\n",
"rntrees = 0\n",
"total_depth = {'e': 0, 'r': 0}\n",
"rtotal_depth = {'e': 0, 'r': 0}\n",
"max_depth = {'e': 0, 'r':0}\n",
"rmax_depth = {'e': 0, 'r': 0}\n",
"for node in NN(test=F.otype.v, value=root_type):\n",
" ntrees += 1\n",
" this_depth = {}\n",
" for kind in ('e', 'r'):\n",
" this_depth[kind] = tree.depth(node, kind)\n",
" different = this_depth['e'] != this_depth['r']\n",
" if different: rntrees += 1\n",
" for kind in ('e', 'r'):\n",
" if this_depth[kind] > max_depth[kind]: max_depth[kind] = this_depth[kind]\n",
" total_depth[kind] += this_depth[kind]\n",
" if different:\n",
" if this_depth[kind] > rmax_depth[kind]: rmax_depth[kind] = this_depth[kind]\n",
" rtotal_depth[kind] += this_depth[kind]\n",
" \n",
"msg(\"{} trees seen, of which in {} cases restructuring makes a difference in depth\".format(ntrees, rntrees))\n",
"if ntrees > 0:\n",
" msg(\"Embedding trees: max depth = {:>2}, average depth = {:.2g}\".format(max_depth['e'], total_depth['e'] / ntrees))\n",
" msg(\"Restructd trees: max depth = {:>2}, average depth = {:.2g}\".format(max_depth['r'], total_depth['r'] / ntrees))\n",
"if rntrees > 0:\n",
" msg(\"Statistics for cases where restructuring makes a difference:\")\n",
" msg(\"Embedding trees: max depth = {:>2}, average depth = {:.2g}\".format(rmax_depth['e'], rtotal_depth['e'] / rntrees))\n",
" msg(\"Restructd trees: max depth = {:>2}, average depth = {:.2g}\".format(rmax_depth['r'], rtotal_depth['r'] / rntrees))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 37s Computing depths\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 38s 11411 trees seen, of which in 0 cases restructuring makes a difference in depth\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 38s Embedding trees: max depth = 3, average depth = 3\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 38s Restructd trees: max depth = 3, average depth = 3\n"
]
}
],
"prompt_number": 11
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"close()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stderr",
"text": [
" 40s Results directory:\n",
"/Users/dirk/laf-fabric-output/calap/trees\n"
]
},
{
"output_type": "stream",
"stream": "stderr",
"text": [
"\n",
"__log__trees.txt 3475 Tue Jul 15 18:05:49 2014\n",
"trees.txt 1624637 Tue Jul 15 18:05:27 2014\n"
]
}
],
"prompt_number": 12
},
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Preview"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here are the first lines of the output."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"!head -n 25 {my_file('trees.txt')}"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\r\n",
"#1R 1,1\tnode=53920\toid=2856\tbmonad=1\t0 1 2 3\r\n",
"(S(CP(U(cj \"\u0718\")))(NP(U(n \"\u0721\u0720\u071f\u0710\"))(U(n \"\u0715\u0718\u071d\u0715\")))(VP(U(vb \"\u0723\u0710\u0712\"))))\r\n",
"\r\n",
"#1R 1,1\tnode=53921\toid=2857\tbmonad=5\t0 1 2 3\r\n",
"(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u0725\u0720\")))(PP(U(pp \"\u0712\")(n \"\u072b\u0722\u071d\u0710\"))))\r\n",
"\r\n",
"#1R 1,1\tnode=53922\toid=2858\tbmonad=9\t0 1 2 3 4 5\r\n",
"(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u0721\u071f\u0723\u071d\u0722\")))(VP(U(vb \"\u0717\u0718\u0718\")))(PP(U(pp \"\u0720\u0717\")))(PP(U(pp \"\u0712\")(n \"\u0720\u0712\u0718\u072b\u0710\"))))\r\n",
"\r\n",
"#1R 1,1\tnode=53923\toid=2859\tbmonad=15\t0 1 2\r\n",
"(S(CP(U(cj \"\u0718\")))(NegP(U(ng \"\u0720\u0710\")))(VP(U(vb \"\u072b\u071a\u0722\"))))\r\n",
"\r\n",
"#1R 1,2\tnode=53924\toid=2860\tbmonad=18\t0 1 2 3\r\n",
"(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u0710\u0721\u072a\u0718\")))(PP(U(pp \"\u0720\u0717\")))(NP(U(n \"\u0725\u0712\u0715\u0718\u0717\u071d\"))))\r\n",
"\r\n",
"#1R 1,2\tnode=53925\toid=2861\tbmonad=22\t0 1 2 3 4 5 6 7 8\r\n",
"(S(InjP(U(ij \"\u0717\u0710\")))(NP(U(n \"\u0725\u0712\u0715\u071d\u071f\"))(U(pp \"\u0729\u0715\u0721\u071d\u071f\")))(VP(U(vb \"\u0722\u0712\u0725\u0718\u0722\")))(PP(U(pp \"\u0720\")(n \"\u0721\u072a\u0722\"))(U(n \"\u0721\u0720\u071f\u0710\")))(NP(U(aj \"\u0725\u0720\u071d\u0721\u072c\u0710\"))(U(aj \"\u0712\u072c\u0718\u0720\u072c\u0710\"))))\r\n",
"\r\n",
"#1R 1,2\tnode=53926\toid=2862\tbmonad=31\t0 1 2 3\r\n",
"(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u072c\u0729\u0718\u0721\")))(PP(U(pp \"\u0729\u0715\u0721\")(n \"\u0721\u0720\u071f\u0710\"))))\r\n",
"\r\n",
"#1R 1,2\tnode=53927\toid=2863\tbmonad=35\t0 1 2 3\r\n",
"(S(CP(U(cj \"\u0718\")))(VP(U(vb \"\u072c\u0717\u0718\u0710\")))(PP(U(pp \"\u0720\u0717\")))(NP(U(aj \"\u0721\u072b\u0721\u072b\u0722\u071d\u072c\u0710\"))))\r\n",
"\r\n"
]
}
],
"prompt_number": 13
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}