{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Show a passage\n", "\n", "accented+vocalized, vocalized, consonantal" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0.00s This is LAF-Fabric 4.8.3\n", "API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html\n", "Feature doc: https://shebanq.ancient-data.org/static/docs/featuredoc/texts/welcome.html\n", "\n" ] } ], "source": [ "import sys,os,re\n", "import collections\n", "from IPython.display import HTML, display_pretty, display_html\n", "\n", "import laf\n", "from laf.fabric import LafFabric\n", "from etcbc.lib import Transcription\n", "from etcbc.preprocess import prep\n", "\n", "fabric = LafFabric()\n", "\n", "source = 'etcbc'\n", "versions = ('4b', '4c')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0.00s LOADING API: please wait ... \n", " 0.00s USING main: etcbc4b DATA COMPILED AT: 2015-11-02T15-08-56\n", " 2.28s LOGFILE=/Users/dirk/laf/laf-fabric-output/etcbc4b/passage/__log__passage.txt\n", " 2.29s INFO: LOADING PREPARED data: please wait ... \n", " 2.29s prep prep: G.node_sort\n", " 2.34s prep prep: G.node_sort_inv\n", " 2.80s prep prep: L.node_up\n", " 5.37s prep prep: L.node_down\n", " 10s prep prep: V.verses\n", " 10s prep prep: V.books_la\n", " 10s ETCBC reference: http://laf-fabric.readthedocs.org/en/latest/texts/ETCBC-reference.html\n", " 11s INFO: LOADED PREPARED data\n", " 11s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX FOR TASK passage AT 2016-11-09T19-10-06\n", " 0.00s LOADING API: please wait ... \n", " 0.00s BEGIN COMPILE m: etcbc4c\n", " 0.00s LOGFILE=/Users/dirk/laf/laf-fabric-data/etcbc4c/bin/__log__compile__.txt\n", " 0.00s PARSING ANNOTATION FILES\n", " 0.08s INFO: parsing etcbc4c_regions.xml\n", " 5.37s INFO: parsing etcbc4c_monads.xml\n", " 28s INFO: parsing etcbc4c_lingo.xml\n", " 1m 22s INFO: parsing etcbc4c_sections.xml\n", " 1m 28s INFO: parsing etcbc4c_monads.lex.xml\n", " 3m 55s INFO: parsing etcbc4c_lingo.c.xml\n", " 4m 15s INFO: parsing etcbc4c_lingo.p.xml\n", " 4m 34s INFO: parsing etcbc4c_lingo.pa.xml\n", " 4m 59s INFO: parsing etcbc4c_lingo.s.xml\n", " 5m 02s INFO: parsing etcbc4c_lingo.sp.xml\n", " 5m 06s INFO: END PARSING\n", " 800724 good regions and 0 faulty ones\n", " 1436894 linked nodes and 0 unlinked ones\n", " 2225333 good edges and 0 faulty ones\n", " 5029799 good annots and 0 faulty ones\n", " 34171309 good features and 0 faulty ones\n", " 9492750 distinct xml identifiers\n", "\n", " 5m 06s MODELING RESULT FILES\n", " 5m 06s INFO: XML-IDS (inverse mapping)\n", " 5m 09s INFO: NODES AND REGIONS\n", " 5m 09s INFO: NODES ANCHOR BOUNDARIES\n", " 5m 29s INFO: NODES SORTING BY REGIONS\n", " 5m 31s INFO: NODES EVENTS\n", " 5m 53s INFO: CONNECTIVITY\n", " 5m 56s WRITING RESULT FILES for m: etcbc4c\n", " 6m 30s END COMPILE m: etcbc4c\n", " 6m 30s USING main: etcbc4c DATA COMPILED AT: 2016-11-09T19-16-37\n", " 6m 34s INFO: LOADING PREPARED data: please wait ... \n", " 6m 34s prep prep: G.node_sort\n", " 6m 34s PREPARING prep: G.node_sort\n", " 6m 34s LOADING API with EXTRAs: please wait ... \n", " 6m 34s USING main: etcbc4c DATA COMPILED AT: 2016-11-09T19-16-37\n", " 6m 36s NORMAL: DATA LOADED FROM SOURCE etcbc4c AND ANNOX FOR TASK passage AT 2016-11-09T19-16-42\n", " 6m 36s SORTING nodes ...\n", " 7m 21s WRITING prep: G.node_sort\n", " 7m 21s prep prep: G.node_sort_inv\n", " 7m 21s PREPARING prep: G.node_sort_inv\n", " 7m 21s SORTING nodes (inv) ...\n", " 7m 22s WRITING prep: G.node_sort_inv\n", " 7m 22s prep prep: L.node_up\n", " 7m 22s PREPARING prep: L.node_up\n", " 7m 22s LOADING API with EXTRAs: please wait ... \n", " 7m 22s USING main: etcbc4c DATA COMPILED AT: 2016-11-09T19-16-37\n", " 7m 22s NORMAL: DATA LOADED FROM SOURCE etcbc4c AND ANNOX FOR TASK passage AT 2016-11-09T19-17-28\n", " 7m 22s Objects contained in books\n", " 7m 33s Objects contained in chapters\n", " 7m 42s Objects contained in verses\n", " 7m 52s Objects contained in half_verses\n", " 8m 01s Objects contained in sentences\n", " 8m 10s Objects contained in sentence_atoms\n", " 8m 19s Objects contained in clauses\n", " 8m 27s Objects contained in clause_atoms\n", " 8m 34s Objects contained in phrases\n", " 8m 41s Objects contained in phrase_atoms\n", " 8m 46s Objects contained in subphrases\n", " 8m 49s Objects contained in words\n", " 8m 52s WRITING prep: L.node_up\n", " 8m 55s prep prep: L.node_down\n", " 8m 55s PREPARING prep: L.node_down\n", " 8m 55s WRITING prep: L.node_down\n", " 8m 59s prep prep: V.verses\n", " 8m 59s PREPARING prep: V.verses\n", " 8m 59s LOADING API with EXTRAs: please wait ... \n", " 8m 59s USING main: etcbc4c DATA COMPILED AT: 2016-11-09T19-16-37\n", " 8m 59s NORMAL: DATA LOADED FROM SOURCE etcbc4c AND ANNOX FOR TASK passage AT 2016-11-09T19-19-05\n", " 8m 59s Making verse index\n", " 9m 00s Done. 23213 verses\n", " 9m 00s WRITING prep: V.verses\n", " 9m 00s prep prep: V.books_la\n", " 9m 00s PREPARING prep: V.books_la\n", " 9m 00s Listing books\n", " 9m 02s Done. 39 books\n", " 9m 02s WRITING prep: V.books_la\n", " 9m 02s ETCBC reference: http://laf-fabric.readthedocs.org/en/latest/texts/ETCBC-reference.html\n", " 9m 02s INFO: LOADED PREPARED data\n", " 9m 02s INFO: DATA LOADED FROM SOURCE etcbc4c AND ANNOX FOR TASK passage AT 2016-11-09T19-19-08\n" ] } ], "source": [ "FF = {}\n", "MSG = {}\n", "LL = {}\n", "for version in ('4b', '4c'):\n", " API = fabric.load(source+version, '--', 'passage', {\n", " \"xmlids\": {\"node\": False, \"edge\": False},\n", " \"features\": ('''\n", " otype\n", " g_cons g_word g_cons_utf8 g_word_utf8 g_word trailer_utf8\n", " book chapter verse label\n", " ''',''),\n", " \"prepare\": prep(select={'L'}),\n", " \"primary\": False,\n", " }, verbose='NORMAL')\n", " FF[version] = API['F']\n", " MSG[version] = API['msg']\n", " LL[version] = API['L']\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 6.65s 4b: Making a mapping between a passage specification and a verse node\n", " 8.34s Done\n", " 8.34s 4c: Making a mapping between a passage specification and a verse node\n", " 9.62s Done\n" ] } ], "source": [ "verses = {}\n", "for version in versions:\n", " msg = MSG[version]\n", " F = FF[version]\n", " msg(\"{}: Making a mapping between a passage specification and a verse node\".format(version))\n", " versesv = collections.defaultdict(lambda: collections.defaultdict(lambda: {}))\n", " for vn in F.otype.s('verse'):\n", " bk = F.book.v(vn)\n", " ch = int(F.chapter.v(vn))\n", " vs = int(F.verse.v(vn))\n", " versesv[bk][ch][vs] = vn\n", " verses[version] = versesv\n", " msg('Done')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HTML('''\n", "\n", "''')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "accent_pat = re.compile('[*0-9]')\n", "tr = Transcription()\n", "\n", "def print_verse(bk, ch, vs, vowels=True, accents=True):\n", " rows = {}\n", " for version in versions:\n", " F = FF[version]\n", " L = LL[version]\n", " label = '{} {}:{}'.format(bk, ch, vs)\n", " vn = verses[version][bk][ch][vs]\n", " treps = []\n", " trepes = []\n", " for w in L.d('word', vn):\n", " if not vowels:\n", " trep = '{}{}'.format(F.g_cons_utf8.v(w), F.trailer_utf8.v(w))\n", " trepe = F.g_cons.v(w)\n", " else:\n", " trep = '{}{}'.format(F.g_word_utf8.v(w), F.trailer_utf8.v(w))\n", " trepe = F.g_word.v(w)\n", " if not accents:\n", " trep = Transcription.to_hebrew(accent_pat.sub('', tr.from_hebrew(trep)))\n", " treps.append(trep)\n", " trepes.append(trepe)\n", " text = ''.join(treps)\n", " texte = ' '.join(trepes)\n", " rows[version] = '''\n", " {}{}\n", " {}{}\n", " '''.format(version, text, label, texte)\n", " return '''\n", "\n", " {}\n", "
'''.format('\\n'.join(rows[version] for version in versions))\n", "\n", "pc = lambda bk, ch, vs: print_verse(bk, ch, vs, vowels=False, accents=False) # no vowels, no accents\n", "pv = lambda bk, ch, vs: print_verse(bk, ch, vs, vowels=True, accents=False) # vowels, no accents\n", "pa = lambda bk, ch, vs: print_verse(bk, ch, vs, vowels=True, accents=True) # vowels and accents" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "
4bויהי באמרם אליו יום ויום ולא שׁמע אליהם ויגידו להמן לראות היעמדו דברי מרדכי כי־הגיד להם אשׁר־הוא יהודי׃\n", "
Esther 3:4W JHJ B >MRM >LJW JWM W JWM W L> CM< >LJHM W JGJDW L HMN L R>WT H JCR HW> JHWDJ
4cויהי באמרם אליו יום ויום ולא שׁמע אליהם ויגידו להמן לראות היעמדו דברי מרדכי כי־הגיד להם אשׁר־הוא יהודי׃
Esther 3:4W JHJ B >MRM >LJW JWM W JWM W L> CM< >LJHM W JGJDW L HMN L R>WT H JCR HW> JHWDJ
" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HTML(pc('Esther', 3, 4))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "
4bוַיְהִי באמרם אֵלָיו יֹום וָיֹום וְלֹא שָׁמַע אֲלֵיהֶם וַיַּגִּידוּ לְהָמָן לִרְאֹות הֲיַעַמְדוּ דִּבְרֵי מָרְדֳּכַי כִּי־הִגִּיד לָהֶם אֲשֶׁר־הוּא יְהוּדִי\n", "
Esther 3:4WA- J:HI81J *B- *>MRM >;L@JW03 JO74WM W@- JO80WM W:- LO71> C@MA73< >:AL;JHE92M WA- J.AG.I74JDW. L:- H@M@81N LI- R:>OWT03 H:A- JA75:ACER& H71W.> J:HW.DI75J00
4cוַיְהִי באמרם אֵלָיו יֹום וָיֹום וְלֹא שָׁמַע אֲלֵיהֶם וַיַּגִּידוּ לְהָמָן לִרְאֹות הֲיַעַמְדוּ דִּבְרֵי מָרְדֳּכַי כִּי־הִגִּיד לָהֶם אֲשֶׁר־הוּא יְהוּדִי
Esther 3:4WA- J:HI81J *B- *>MRM >;L@JW03 JO74WM W@- JO80WM W:- LO71> C@MA73< >:AL;JHE92M WA- J.AG.I74JDW. L:- H@M@81N LI- R:>OWT03 H:A- JA45:ACER H71W.> J:HW.DI75J
" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HTML(pv('Esther', 3, 4))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", "\n", " \n", " \n", " \n", "
4bוַיְהִ֗י ֯ב֯אמרם אֵלָיו֙ יֹ֣ום וָיֹ֔ום וְלֹ֥א שָׁמַ֖ע אֲלֵיהֶ֑ם וַיַּגִּ֣ידוּ לְהָמָ֗ן לִרְאֹות֙ הֲיַֽעַמְדוּ֙ דִּבְרֵ֣י מָרְדֳּכַ֔י כִּֽי־הִגִּ֥יד לָהֶ֖ם אֲשֶׁר־ה֥וּא יְהוּדִֽי׃\n", "
Esther 3:4WA- J:HI81J *B- *>MRM >;L@JW03 JO74WM W@- JO80WM W:- LO71> C@MA73< >:AL;JHE92M WA- J.AG.I74JDW. L:- H@M@81N LI- R:>OWT03 H:A- JA75:ACER& H71W.> J:HW.DI75J00
4cוַיְהִ֗י באמרם אֵלָיו֙ יֹ֣ום וָיֹ֔ום וְלֹ֥א שָׁמַ֖ע אֲלֵיהֶ֑ם וַיַּגִּ֣ידוּ לְהָמָ֗ן לִרְאֹות֙ הֲיַֽעַמְדוּ֙ דִּבְרֵ֣י מָרְדֳּכַ֔י כִּֽי־הִגִּ֥יד לָהֶ֖ם אֲשֶׁר־ה֥וּא יְהוּדִֽי׃
Esther 3:4WA- J:HI81J *B- *>MRM >;L@JW03 JO74WM W@- JO80WM W:- LO71> C@MA73< >:AL;JHE92M WA- J.AG.I74JDW. L:- H@M@81N LI- R:>OWT03 H:A- JA45:ACER H71W.> J:HW.DI75J
" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "HTML(pa('Esther', 3, 4))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }