{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "\n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Paragraphs\n", "\n", "Paragraph numbers are stored in an extra annotation package called `para`.\n", "That packages makes available the feature `pargr`.\n", "\n", "In this notebook we show how you could use that feature, together with features from an other annotation package, `lexicon`, of which we use the `gloss` feature." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0.00s This is LAF-Fabric 4.8.2\n", "API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html\n", "Feature doc: https://shebanq.ancient-data.org/static/docs/featuredoc/texts/welcome.html\n", "\n", " 3m 01s END\n" ] } ], "source": [ "import sys, os\n", "import collections\n", "\n", "import laf\n", "from laf.fabric import LafFabric\n", "from etcbc.preprocess import prepare\n", "fabric = LafFabric()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Loading the feature data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0.00s LOADING API: please wait ... \n", " 0.01s DETAIL: COMPILING m: etcbc4b: UP TO DATE\n", " 0.01s USING main: etcbc4b DATA COMPILED AT: 2015-11-02T15-08-56\n", " 0.01s DETAIL: COMPILING a: lexicon: UP TO DATE\n", " 0.01s USING annox: lexicon DATA COMPILED AT: 2016-07-08T14-32-54\n", " 0.01s DETAIL: COMPILING a: para: UP TO DATE\n", " 0.02s USING annox: para DATA COMPILED AT: 2016-07-08T14-38-37\n", " 0.03s DETAIL: load main: G.node_anchor_min\n", " 0.12s DETAIL: load main: G.node_anchor_max\n", " 0.19s DETAIL: load main: G.node_sort\n", " 0.25s DETAIL: load main: G.node_sort_inv\n", " 0.74s DETAIL: load main: G.edges_from\n", " 0.82s DETAIL: load main: G.edges_to\n", " 0.90s DETAIL: load main: F.etcbc4_db_otype [node] \n", " 1.72s DETAIL: load annox lexicon: F.etcbc4_lex_gloss [node] \n", " 2.00s DETAIL: load annox para: F.etcbc4_px_pargr [node] \n", " 2.05s LOGFILE=/Users/dirk/laf/laf-fabric-output/etcbc4b/paragraphs/__log__paragraphs.txt\n", " 2.10s INFO: LOADING PREPARED data: please wait ... \n", " 2.10s prep prep: G.node_sort\n", " 2.17s prep prep: G.node_sort_inv\n", " 3.04s prep prep: L.node_up\n", " 6.59s prep prep: L.node_down\n", " 13s prep prep: V.verses\n", " 13s prep prep: V.books_la\n", " 13s ETCBC reference: http://laf-fabric.readthedocs.org/en/latest/texts/ETCBC-reference.html\n", " 15s INFO: LOADED PREPARED data\n", " 15s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX lexicon, para FOR TASK paragraphs AT 2016-09-22T10-17-59\n" ] } ], "source": [ "version = '4b'\n", "API = fabric.load('etcbc{}'.format(version), 'lexicon,para', 'paragraphs', {\n", " \"xmlids\": {\"node\": False, \"edge\": False},\n", " \"features\": ('''\n", " otype \n", " gloss\n", " pargr\n", " ''',\n", " '''\n", " '''),\n", " \"prepare\": prepare,\n", " \"primary\": False,\n", "}, verbose='DETAIL')\n", "exec(fabric.localnames.format(var='fabric'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Result\n", "\n", "We collect all clause atoms, and produce the paragraph numbers and glosses of them, but only the first 100 of them." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1 in beginning create god(s) the heavens and the earth\n", " 1 and the earth be emptiness and emptiness\n", " 1 and darkness upon face primeval ocean\n", " 1 and wind god(s) shake upon face the water\n", " 1.1 and say god(s)\n", " 1.1.1 be light\n", " 1.1.2 and be light\n", " 1.1.3 and see god(s) the light\n", " 1.1.3 that be good\n", " 1.1.4 and separate god(s) interval the light and interval the darkness\n", " 1.1.5 and call god(s) to the light day\n", " 1.1.5 and to the darkness call night\n", " 1.1.5.1 and be evening\n", " 1.1.5.2 and be morning\n", " 1.1.5.2 day one\n", " 1.2 and say god(s)\n", " 1.2.1 be firmament in midst the water\n", " 1.2.1 and be separate interval water to water\n", " 1.2.2 and make god(s) the firmament\n", " 1.2.2 and separate interval the water\n", " 1.2.2 from under part to the firmament\n", " 1.2.2 and interval the water\n", " 1.2.2 from upon to the firmament\n", " 1.2.2 and be thus\n", " 1.2.2.1 and call god(s) to the firmament heavens\n", " 1.2.2.1.1 and be evening\n", " 1.2.2.1.2 and be morning\n", " 1.2.2.1.2 day second\n", " 1.3 and say god(s)\n", " 1.3.1 collect the water from under part the heavens to place one\n", " 1.3.1 and see the dry land\n", " 1.3 and be thus\n", " 1.3.2 and call god(s) to the dry land earth\n", " 1.3.2 and to collection the water call sea\n", " 1.3.3 and see god(s)\n", " 1.3.3 that be good\n", " 1.4 and say god(s)\n", " 1.4.1 grow green the earth young grass herb\n", " 1.4.1 sow seed\n", " 1.4.1 tree fruit\n", " 1.4.1 make fruit to kind\n", " 1.4.1 seed in\n", " 1.4.1 upon the earth\n", " 1.4 and be thus\n", " 1.4.2 and go out the earth young grass herb\n", " 1.4.2 sow seed to kind\n", " 1.4.2 and tree\n", " 1.4.2 make fruit\n", " 1.4.2 seed in\n", " 1.4.2 to kind\n", " 1.4.3 and see god(s)\n", " 1.4.3 that be good\n", " 1.4.3.1 and be evening\n", " 1.4.3.2 and be morning\n", " 1.4.3.2 day third\n", " 1.5 and say god(s)\n", " 1.5.1 be lamp in firmament the heavens\n", " 1.5.1 to separate interval the day and interval the night\n", " 1.5.1 and be to sign and to appointment and to day and year\n", " 1.5.1 and be to lamp in firmament the heavens\n", " 1.5.1 to be light upon the earth\n", " 1.5 and be thus\n", " 1.5.2 and make god(s) two the lamp the great\n", " 1.5.2 the lamp the great to dominion the day\n", " 1.5.2 and the lamp the small to dominion the night\n", " 1.5.2 and the star\n", " 1.5.2.1 and give god(s) in firmament the heavens\n", " 1.5.2.1 to be light upon the earth\n", " 1.5.2.1 and to rule in the day and in the night\n", " 1.5.2.1 and to separate interval the light and interval the darkness\n", " 1.5.3 and see god(s)\n", " 1.5.3 that be good\n", " 1.5.3.1 and be evening\n", " 1.5.3.2 and be morning\n", " 1.5.3.2 day fourth\n", " 1.6 and say god(s)\n", " 1.6.1 swarm the water swarming creatures soul alive\n", " 1.6.1 and birds fly upon the earth upon face firmament the heavens\n", " 1.6.2 and create god(s) the sea-monster the great and whole soul\n", " 1.6.2 the alive\n", " 1.6.2 the creep\n", " 1.6.2 swarm the water\n", " 1.6.2 to kind\n", " 1.6.2 and whole birds wing to kind\n", " 1.6.3 and see god(s)\n", " 1.6.3 that be good\n", " 1.6.3.1 and bless god(s)\n", " 1.6.3.1 to say\n", " 1.6.3.1.1 be fertile\n", " 1.6.3.1.1 and be many\n", " 1.6.3.1.1 and be full the water in the sea\n", " 1.6.3.1.1 and the birds be many in the earth\n", " 1.6.3.2 and be evening\n", " 1.6.3.3 and be morning\n", " 1.6.3.3 day fifth\n", " 1.7 and say god(s)\n", " 1.7.1 go out the earth soul alive to kind\n", " 1.7.1 cattle and creeping animals and wild animal earth to kind\n", " 1.7 and be thus\n", " 1.7.2 and make god(s) wild animal the earth to kind\n" ] } ], "source": [ "limit = 100\n", "n = 0\n", "for ca in F.otype.s('clause_atom'):\n", " n += 1\n", " if n > limit: break\n", " inf('{:>10} {}'.format(\n", " F.pargr.v(ca),\n", " ' '.join(F.gloss.v(w) for w in L.d('word', ca)),\n", " ), withtime=False)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }