{ "metadata": { "name": "", "signature": "sha256:a88ec4d550390c0cd50ba8cc7b6494318a8e818a9643fc6df83c5017aa4d921d" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "\n", "\n", "\n", "" ] }, { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Comparison of the differences between the plain text of ETCBC versions 3 and 4" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import sys\n", "import collections\n", "import re\n", "import unicodedata\n", "\n", "from IPython.display import clear_output, display, HTML\n", "\n", "from laf.fabric import LafFabric\n", "fabric3 = LafFabric()\n", "fabric4 = LafFabric()\n", "\n", "from etcbc.lib import Transcription\n", "from etcbc.preprocess import prepare\n", "tr = Transcription()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s This is LAF-Fabric 4.4.1\n", "http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s This is LAF-Fabric 4.4.1\n", "http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "API3 = fabric3.load('etcbc3', '--', 'monads', {\n", " \"xmlids\": {\"node\": False, \"edge\": False},\n", " \"features\": ('''\n", " otype monads text suffix\n", " graphical_word\n", " verse_label\n", " ''',''),\n", " \"primary\": False,\n", " \"prepare\": prepare,\n", "})\n", "API4 = fabric4.load('etcbc4', '--', 'monads', {\n", " \"xmlids\": {\"node\": False, \"edge\": False},\n", " \"features\": ('''\n", " otype monads g_word_utf8 trailer_utf8\n", " g_word\n", " label\n", " ''',''),\n", " \"primary\": False,\n", " \"prepare\": prepare,\n", "})" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s LOADING API: please wait ... \n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.01s INFO: USING DATA COMPILED AT: 2014-06-27T12-21-04\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 2.66s LOGFILE=/Users/dirk/laf-fabric-output/etcbc3/monads/__log__monads.txt\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 3.18s INFO: DATA LOADED FROM SOURCE etcbc3 AND ANNOX -- FOR TASK monads AT 2014-07-16T09-47-51\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s LOADING API: please wait ... \n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 0.00s INFO: USING DATA COMPILED AT: 2014-07-14T16-45-08\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 3.17s LOGFILE=/Users/dirk/laf-fabric-output/etcbc4/monads/__log__monads.txt\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 3.88s INFO: DATA LOADED FROM SOURCE etcbc4 AND ANNOX -- FOR TASK monads AT 2014-07-16T09-47-55\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "splits = {\n", " (3, ' JES 19,18', 21): 2,\n", " (3, ' DAN 01,02', 11): 2,\n", " (3, ' DAN 01,05', 25): 3,\n", " (3, ' DAN 01,15', 2): 2,\n", " (3, ' DAN 01,18', 3): 2,\n", " (3, ' NEH 07,69', 2): 2,\n", " (3, ' ICHR27,12', 15): 0,\n", "}\n", "\n", "def monad_passage_index(API, vr, vlabel_name, graphical_name, text_name, trailer_name):\n", " mp_index = collections.OrderedDict()\n", " np_index = collections.OrderedDict()\n", " \n", " cur_label = None\n", " cur_wn = None\n", " cur_nodes = None\n", " NN = API['NN']\n", " F = API['F']\n", " msg = API['msg']\n", " chunk = 50000\n", " i = 0\n", " ci = 0\n", " for n in NN():\n", " otype = F.otype.v(n)\n", " if otype == 'verse':\n", " if cur_nodes != None: np_index[cur_label] = tuple(cur_nodes)\n", " cur_label = F.item[vlabel_name].v(n)\n", " cur_nodes = []\n", " cur_wn = 0\n", " elif otype == 'word':\n", " i += 1\n", " ci += 1\n", " if ci == chunk:\n", " ci = 0\n", " msg(\"{} words\".format(i))\n", " m = F.monads.v(n)\n", " cur_nodes.append(n)\n", " \n", " translit = F.item[graphical_name].v(n)\n", " text = (F.item[text_name].v(n) + F.item[trailer_name].v(n).replace('\\n',''))\n", " \n", " if len(text) == 0:\n", " cur_wn += 1\n", " mp_index[(cur_label, cur_wn)] = (n, m, translit, '')\n", " continue\n", " start = 0\n", " while start < len(text):\n", " s_maqef = text.find('\\u05BE', start)\n", " s_space = text.find(' ', start)\n", " s_min = min(s_maqef if s_maqef >= 0 else len(text) - 1, s_space if s_space >= 0 else len(text) - 1) + 1\n", " comp = text[start:s_min]\n", " start = s_min\n", " comps = comp\n", " while comps != None:\n", " cur_wn += 1\n", " spos = splits.get((vr, cur_label, cur_wn), None)\n", " if spos != None:\n", " do_comp = comp[0:spos]\n", " comps = comp[spos:]\n", " else:\n", " do_comp = comp\n", " comps = None\n", " mp_index[(cur_label, cur_wn)] = (n, m, translit, do_comp)\n", " if cur_nodes != None: np_index[cur_label] = tuple(cur_nodes)\n", " msg(\"{} words\".format(i))\n", " return (mp_index, np_index)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "API3['msg'](\"Making index for ETCBC3\")\n", "(etcbc3, etcbc3n) = monad_passage_index(API3, 3, 'verse_label', 'graphical_word', 'text', 'suffix')\n", "API3['msg'](\"Done\")\n", "API4['msg'](\"Making index for ETCBC4\")\n", "(etcbc4, etcbc4n) = monad_passage_index(API4, 4, 'label', 'g_word', 'g_word_utf8', 'trailer_utf8')\n", "API4['msg'](\"Done\")" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stderr", "text": [ " 17s Making index for ETCBC3\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 18s 50000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 19s 100000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 20s 150000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 21s 200000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 22s 250000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 23s 300000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s 350000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 26s 400000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 27s 426499 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 27s Done\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 23s Making index for ETCBC4\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 24s 50000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 25s 100000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 26s 150000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 27s 200000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 28s 250000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 29s 300000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 30s 350000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 31s 400000 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 32s 426555 words\n" ] }, { "output_type": "stream", "stream": "stderr", "text": [ " 32s Done\n" ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "show_case_init = '''\n", "'''\n", "show_case_table_init = '''\n", "'''\n", "show_case_table_final = '''
\n", "'''\n", "show_case_final = '''\n", "'''\n", "\n", "heb_css = '''\n", " text-align: right; \n", " font-family: SBL Hebrew; \n", " font-size: 32pt; \n", " height: 40pt;}\n", "'''\n", "def hebuni(heb):\n", " display(HTML('''

 {}

'''.format(heb_css, heb)))\n", "\n", "def check_dagesh_accent_vowel_pos(c, a):\n", " d = '\\u05BC'\n", " row_tpl = '' + (' {} {}' * 6) + ''\n", " vowels = [chr(n) for n in range(0x5B0, 0x5BC)]\n", " def perms(v): return (\n", " c + d + v + a, \n", " c + d + a + v,\n", " c + a + d + v, \n", " c + v + d + a, \n", " c + v + a + d, \n", " c + a + v + d,\n", " )\n", " def row(v):\n", " r = []\n", " for x in perms(v):\n", " r.append(x)\n", " r.append(unicodedata.normalize('NFKC', x))\n", " return tuple(r)\n", " \n", " rows = [row_tpl.format(*row(v)) for v in vowels]\n", " t = show_case_init + show_case_table_init + '\\n'.join(rows) + show_case_table_final + show_case_final\n", " display(HTML(t))\n", "\n", "def check_accent_vowel_pos(c, a):\n", " row_tpl = '' + (' {} {}' * 2) + ''\n", " vowels = [chr(n) for n in range(0x5B0, 0x5BC)]\n", " def perms(v): return (\n", " c + v + a, \n", " c + a + v,\n", " )\n", " def row(v):\n", " r = []\n", " for x in perms(v):\n", " r.append(x)\n", " r.append(unicodedata.normalize('NFKC', x))\n", " return tuple(r)\n", " \n", " rows = [row_tpl.format(*row(v)) for v in vowels]\n", " t = show_case_init + show_case_table_init + '\\n'.join(rows) + show_case_table_final + show_case_final\n", " display(HTML(t))\n", "\n", "\n", "def show_range(passage, wnums):\n", " gw = ''\n", " heb = ''\n", " ms = []\n", " ns = []\n", " good = True\n", " for wnum in wnums:\n", " if (passage, wnum) not in etcbc4:\n", " print(\"No word {} in passage {} in etcbc4\".format(wnum, passage))\n", " good = False\n", " break\n", " info = etcbc4[(passage, wnum)]\n", " ms.append(info[0])\n", " ns.append(info[1])\n", " gw += info[2]\n", " heb += info[3]\n", " if not good: return 'ERROR'\n", " lus = show_heb(heb)\n", " wnumsrep = ', '.join(str(wnum) for wnum in wnums)\n", " msrep = ', '.join(str(x) for x in ms)\n", " nsrep = ', '.join(str(x) for x in ns)\n", "\n", " t = ''\n", " t += '{} words {}\\n'.format(\n", " passage, wnumsrep, passage, wnumsrep,\n", " )\n", " t += 'ETCBC4\\n'\n", " t += '

monad {}, node {}

\\n'.format(msrep, nsrep)\n", " t += '

{}

\\n'.format(heb)\n", " t += '

{}

\\n'.format(\n", " gw.replace('&', '&').replace('<', '<').replace('>', '>'), \n", " )\n", " for lu in lus:\n", " t += '{}\\n'.format(lu)\n", " ht = '{}{}{}{}{}'.format(\n", " show_case_init, \n", " show_case_table_init, \n", " t,\n", " show_case_table_final,\n", " show_case_final,\n", " )\n", " display(HTML(ht))\n", " oht = API4['outfile']('{}-{}.html'.format(passage.strip(), wnumsrep))\n", " oht.write(ht)\n", " oht.close()\n", " \n", "def show_heb(heb):\n", " return ['{}{:04X}{}'.format(\n", " tr.hebrew_mappingi[c] if c in tr.hebrew_mappingi else c, \n", " ord(c), \n", " unicodedata.name(c).replace('HEBREW ',''),\n", " ) for c in Transcription._decomp(unicodedata.normalize('NFKD', heb))]\n", "\n", "def show_hstring(heb):\n", " t = '''

{}

{}

'''.format(\n", " heb,\n", " '
'.join('{:04X}={}'.format(ord(c), unicodedata.name(c).replace('HEBREW ','')) for c in heb),\n", " )\n", " display(HTML(t))\n", " \n", "def _show_case(passage, wnum):\n", " g_n = {}\n", " g_m = {}\n", " g_w = {}\n", " g_h = {}\n", " l_u = {}\n", " good = True\n", " for v in (('3', etcbc3), ('4', etcbc4)):\n", " (vn, vindex) = v\n", " if (passage, wnum) not in vindex:\n", " print(\"No word {} in passage {} in etcbc{}\".format(wnum, passage, vn))\n", " good = False\n", " continue\n", " (g_n[vn], g_m[vn], g_w[vn], g_h[vn]) = vindex[(passage, wnum)]\n", " l_u[vn] = show_heb(g_h[vn])\n", " if not good: return 'ERROR'\n", " lx = max(len(l_u['3']), len(l_u['4']))\n", " ln = min(len(l_u['3']), len(l_u['4']))\n", "\n", " t = ''\n", " t += '{} word {}\\n'.format(passage, wnum, passage, wnum)\n", " t += 'ETCBC3ETCBC4\\n'\n", " t += '

monad {}, node {}

monad {}, node {}

\\n'.format(\n", " g_m['3'], g_n['3'], g_m['4'], g_n['4'],\n", " )\n", " t += '

{}

{}

\\n'.format(\n", " g_h['3'], g_h['4'],\n", " )\n", " t += '

{}

{}

\\n'.format(\n", " g_w['3'].replace('&', '&').replace('<', '<').replace('>', '>'), \n", " g_w['4'].replace('&', '&').replace('<', '<').replace('>', '>'),\n", " )\n", " for r in range(lx):\n", " t += '{}{}\\n'.format(' class=\"hdiv\"' if r >= ln or l_u['3'][r] != l_u['4'][r] else '',\n", " l_u['3'][r] if r < len(l_u['3']) else ' ',\n", " l_u['4'][r] if r < len(l_u['4']) else ' ',\n", " )\n", " return t\n", "\n", "def show_case(passage, wnum):\n", " t = '{}{}{}{}{}'.format(\n", " show_case_init, \n", " show_case_table_init, \n", " _show_case(passage, wnum),\n", " show_case_table_final,\n", " show_case_final)\n", " display(HTML(t))\n", "\n", "def write_cases(elist, oh):\n", " oh.write(show_case_table_init)\n", " for e in elist: oh.write(_show_case(*e))\n", " oh.write(show_case_table_final)\n", " " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "check_dagesh_accent_vowel_pos('\\u05DE', '\\u0596')\n", "check_accent_vowel_pos('\\u05DE', '\\u0596')\n" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596 \u05de\u05b0\u05bc\u0596
 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596 \u05de\u05b1\u05bc\u0596
 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596 \u05de\u05b2\u05bc\u0596
 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596 \u05de\u05b3\u05bc\u0596
 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596 \u05de\u05b4\u05bc\u0596
 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596 \u05de\u05b5\u05bc\u0596
 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596 \u05de\u05b6\u05bc\u0596
 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596 \u05de\u05b7\u05bc\u0596
 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596 \u05de\u05b8\u05bc\u0596
 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596 \u05de\u05b9\u05bc\u0596
 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596 \u05de\u05ba\u05bc\u0596
 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596 \u05de\u05bb\u05bc\u0596
\n", "\n" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
 \u05de\u05b0\u0596 \u05de\u05b0\u0596 \u05de\u05b0\u0596 \u05de\u05b0\u0596
 \u05de\u05b1\u0596 \u05de\u05b1\u0596 \u05de\u05b1\u0596 \u05de\u05b1\u0596
 \u05de\u05b2\u0596 \u05de\u05b2\u0596 \u05de\u05b2\u0596 \u05de\u05b2\u0596
 \u05de\u05b3\u0596 \u05de\u05b3\u0596 \u05de\u05b3\u0596 \u05de\u05b3\u0596
 \u05de\u05b4\u0596 \u05de\u05b4\u0596 \u05de\u05b4\u0596 \u05de\u05b4\u0596
 \u05de\u05b5\u0596 \u05de\u05b5\u0596 \u05de\u05b5\u0596 \u05de\u05b5\u0596
 \u05de\u05b6\u0596 \u05de\u05b6\u0596 \u05de\u05b6\u0596 \u05de\u05b6\u0596
 \u05de\u05b7\u0596 \u05de\u05b7\u0596 \u05de\u05b7\u0596 \u05de\u05b7\u0596
 \u05de\u05b8\u0596 \u05de\u05b8\u0596 \u05de\u05b8\u0596 \u05de\u05b8\u0596
 \u05de\u05b9\u0596 \u05de\u05b9\u0596 \u05de\u05b9\u0596 \u05de\u05b9\u0596
 \u05de\u05ba\u0596 \u05de\u05ba\u0596 \u05de\u05ba\u0596 \u05de\u05ba\u0596
 \u05de\u05bb\u0596 \u05de\u05bb\u0596 \u05de\u05bb\u0596 \u05de\u05bb\u0596
\n", "\n" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "hebuni('\\u05C6\\u0307')" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "

 \u05c6\u0307

" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "F = API4['F']\n", "show_hstring('\u05d0\u05b7\u059a\u05e3')\n", "show_hstring(F.g_word_utf8.v(1186))\n", "show_hstring('\u05d4\u05a0\u05d5\u05bc\u05d0')\n", "n = 76786\n", "show_hstring(F.g_word_utf8.v(n) + F.trailer_utf8.v(n).replace('\\n', ''))" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "

\u05d0\u05b7\u059a\u05e3

05D0=LETTER ALEF
05B7=POINT PATAH
059A=ACCENT YETIV
05E3=LETTER FINAL PE

" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "

\u05d0\u05b7\u059a\u05e3

05D0=LETTER ALEF
05B7=POINT PATAH
059A=ACCENT YETIV
05E3=LETTER FINAL PE

" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "

\u05d4\u05a0\u05d5\u05bc\u05d0

05D4=LETTER HE
05A0=ACCENT TELISHA GEDOLA
05D5=LETTER VAV
05BC=POINT DAGESH OR MAPIQ
05D0=LETTER ALEF

" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "

\u05d9\u05b4\u05e9\u05b0\u05c2\u05e8\u05b8\u05d0\u05b5\u05bd\u05dc\u05c3 \u05c6\u0307 \u05e4

05D9=LETTER YOD
05B4=POINT HIRIQ
FB2B=LETTER SHIN WITH SIN DOT
05B0=POINT SHEVA
05E8=LETTER RESH
05B8=POINT QAMATS
05D0=LETTER ALEF
05B5=POINT TSERE
05BD=POINT METEG
05DC=LETTER LAMED
05C3=PUNCTUATION SOF PASUQ
0020=SPACE
05C6=PUNCTUATION NUN HAFUKHA
0307=COMBINING DOT ABOVE
0020=SPACE
05E4=LETTER PE

" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "show_case(' JES 19,18', 23)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
JES 19,18 word 23
ETCBC3ETCBC4

monad 219372, node 218383

monad 218425, node 218424

\u05d9\u05b5\u05d0\u05b8\u05de\u05b5\u0596\u05e8

\u05d9\u05b5\u05d0\u05b8\u05de\u05b5\u0596\u05e8

J;>@M;73R

J;>@M;73R

J05D9LETTER YODJ05D9LETTER YOD
;05B5POINT TSERE;05B5POINT TSERE
>05D0LETTER ALEF>05D0LETTER ALEF
@05B8POINT QAMATS@05B8POINT QAMATS
M05DELETTER MEMM05DELETTER MEM
;05B5POINT TSERE;05B5POINT TSERE
730596ACCENT TIPEHA730596ACCENT TIPEHA
R05E8LETTER RESHR05E8LETTER RESH
_0020SPACE_0020SPACE
\n", "\n" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "show_range(' GEN 01,03', range(6,9))" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
GEN 01,03 words 6, 7, 8
ETCBC4

monad 36, 37, 38, node 37, 38, 39

\u05d5\u05b7\u05bd\u05d9\u05b0\u05d4\u05b4\u05d9\u05be\u05d0\u05b9\u05bd\u05d5\u05e8\u05c3

WA75-J:HIJ&>O75WR00

W05D5LETTER VAV
A05B7POINT PATAH
3505BDPOINT METEG
J05D9LETTER YOD
:05B0POINT SHEVA
H05D4LETTER HE
I05B4POINT HIRIQ
J05D9LETTER YOD
&05BEPUNCTUATION MAQAF
>05D0LETTER ALEF
O05B9POINT HOLAM
3505BDPOINT METEG
W05D5LETTER VAV
R05E8LETTER RESH
0005C3PUNCTUATION SOF PASUQ
\n", "\n" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] } ], "prompt_number": 35 }, { "cell_type": "code", "collapsed": false, "input": [ "for i in range(6,9):\n", " show_case(' GEN 01,03', i)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
GEN 01,03 word 6
ETCBC3ETCBC4

monad 37, node 36

monad 37, node 36

\u05d5\u05b7\u05bd

\u05d5\u05b7\u05bd

WA75-

WA75-

W05D5LETTER VAVW05D5LETTER VAV
A05B7POINT PATAHA05B7POINT PATAH
3505BDPOINT METEG3505BDPOINT METEG
\n", "\n" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
GEN 01,03 word 7
ETCBC3ETCBC4

monad 38, node 37

monad 38, node 37

\u05d9\u05b0\u05d4\u05b4\u05d9\u05be

\u05d9\u05b0\u05d4\u05b4\u05d9\u05be

J:HIJ&

J:HIJ&

J05D9LETTER YODJ05D9LETTER YOD
:05B0POINT SHEVA:05B0POINT SHEVA
H05D4LETTER HEH05D4LETTER HE
I05B4POINT HIRIQI05B4POINT HIRIQ
J05D9LETTER YODJ05D9LETTER YOD
&05BEPUNCTUATION MAQAF&05BEPUNCTUATION MAQAF
\n", "\n" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
GEN 01,03 word 8
ETCBC3ETCBC4

monad 39, node 38

monad 39, node 38

\u05d0\u05b9\u05bd\u05d5\u05e8\u05c3

\u05d0\u05b9\u05bd\u05d5\u05e8\u05c3

>O75WR00

>O75WR00

>05D0LETTER ALEF>05D0LETTER ALEF
O05B9POINT HOLAMO05B9POINT HOLAM
3505BDPOINT METEG3505BDPOINT METEG
W05D5LETTER VAVW05D5LETTER VAV
R05E8LETTER RESHR05E8LETTER RESH
0005C3PUNCTUATION SOF PASUQ0005C3PUNCTUATION SOF PASUQ
\n", "\n" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] } ], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "missing_in_3 = collections.OrderedDict()\n", "missing_in_4 = collections.OrderedDict()\n", "different = collections.OrderedDict()\n", "\n", "codes = collections.OrderedDict((\n", " ('q', 'qetiv/qere'),\n", " ('m', 'merecha -silluq-tifcha-mehuppach-nothing'),\n", " ('n', 'nun hafukha'),\n", " ('d', 'dagesh'),\n", " (None, 'NOT ANALYSED'),\n", "))\n", "\n", "for e in etcbc3:\n", " if e not in etcbc4: missing_in_4[e] = None\n", "for e in etcbc4:\n", " if e not in etcbc3: missing_in_3[e] = None\n", "\n", "for e in etcbc4:\n", " if e in etcbc3:\n", " un3 = Transcription._decomp(unicodedata.normalize('NFKD', etcbc3[e][3]))\n", " un4 = Transcription._decomp(unicodedata.normalize('NFKD', etcbc4[e][3]))\n", " if un3 != un4:\n", " code = None\n", " if '\\u05AF' in un4:\n", " code = 'q'\n", " elif un3.replace('\\u05A5', '').replace('\\u05BD', '').replace('\\u0596', '').replace('\\u05A4', '').replace('\\u05BE', '') == un4.replace('\\u05A5', '').replace('\\u05BD', '').replace('\\u0596', '').replace('\\u05A4', '').replace('\\u05BE', ''):\n", " code = 'm'\n", " elif '\\u05C6' in un3 + un4:\n", " code = 'n'\n", " elif un3.replace('\\u05BC', '') == un4.replace('\\u05BC', ''):\n", " code = 'd'\n", " different[e] = code\n", "\n", "print(\"Missing in etcbc3: {} words\".format(len(missing_in_3)))\n", "print(\"Missing in etcbc4: {} words\".format(len(missing_in_4)))\n", "print(\"Different in etcbc3 and etcbc4: {} words\".format(len(different)))\n", "\n", "for code in codes:\n", " print(\"{}x : {}\".format(len([e for e in different if different[e] == code]), codes[code]))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Missing in etcbc3: 2 words\n", "Missing in etcbc4: 1 words\n", "Different in etcbc3 and etcbc4: 2020 words\n", "1892x : qetiv/qere\n", "52x : merecha -silluq-tifcha-mehuppach-nothing\n", "9x : nun hafukha\n", "11x : dagesh\n", "56x : NOT ANALYSED\n" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "ohtest = API4['outfile']('test.html')\n", "testcases = [(' EXO 32,17', i) for i in range(1,11)]\n", "write_cases(testcases, ohtest)\n", "ohtest.close()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "print(missing_in_4)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "OrderedDict([((' EZE 18,14', 19), None)])\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "ohf = API4['outfile']('comp.txt')\n", "ohe = API4['outfile']('cases.html')\n", "toc = [\"

Table of Contents

\\n\"]\n", "for code in reversed(codes):\n", " if code == 'q': continue\n", " desc = codes[code]\n", " toc.append('

Cases of {}

\\n'.format(code, desc))\n", " cases = list(e for e in different if different[e] == code)\n", " for (p,w) in cases:\n", " toc.append('{}w{}\\n'.format(p, w, p.replace(' ',''), w))\n", "ohe.write(show_case_init)\n", "ohe.write(''.join(toc))\n", "for code in reversed(codes):\n", " if code == 'q': continue\n", " desc = codes[code]\n", " cases = list(e for e in different if different[e] == code)\n", " ohe.write('

Cases of {}

\\n'.format(code, desc))\n", " write_cases(cases, ohe)\n", " ohf.write('# {}\\n'.format(desc))\n", " ohf.write('\\n'.join('{}\\t{}\\t{}'.format(e[0], e[1], 'x') for e in cases))\n", "ohe.write(show_case_final)\n", "ohf.close()\n", "ohe.close()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "def show_swap(p, wn, ws):\n", " F3 = API3['F']\n", " F4 = API4['F']\n", " v3n = etcbc3n[p]\n", " v4n = etcbc4n[p]\n", " v3text = ''.join('{}{}'.format(F3.text.v(n), F3.suffix.v(n)) for n in v3n)\n", " v4text = ''.join('{}{}'.format(F4.g_word_utf8.v(n), F4.trailer_utf8.v(n)) for n in v4n)\n", " d_tpl = '''\n", "\n", " {} word {} m={} n={}\n", " {}\n", "  {}\n", " {}\n", "  {}\n", "  {}\n", "'''\n", " v_tpl = '{}

 {}

'\n", " t = '{}{}{}{}{}'.format(\n", " show_case_init,\n", " show_case_table_init,\n", " d_tpl.format(p, wn, *ws),\n", " v_tpl.format('ETCBC3', v3text),\n", " v_tpl.format('ETCBC4', v4text),\n", " show_case_table_final,\n", " show_case_final,\n", " )\n", " display(HTML(t))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "NN = API4['NN']\n", "F = API4['F']\n", "msg = API4['msg']\n", "\n", "def find_swaps():\n", " diffs = collections.OrderedDict()\n", " cur_label = None\n", " cur_w = 0\n", " for n in NN():\n", " otype = F.otype.v(n)\n", " if otype == 'verse':\n", " cur_label = F.label.v(n)\n", " cur_w = 0\n", " continue\n", " elif otype == 'word':\n", " cur_w += 1\n", " translit = F.g_word.v(n)\n", " translit_sw = Transcription.swap_accent_pat.sub(Transcription._swap_accent, translit)\n", " if translit != translit_sw:\n", " diffs[(cur_label, cur_w)] = (\n", " F.monads.v(n),\n", " n,\n", " translit, \n", " Transcription.to_hebrew_x(translit), \n", " translit_sw, \n", " Transcription.to_hebrew_x(translit_sw), \n", " F.g_word_utf8.v(n),\n", " )\n", " print(len(diffs))\n", " return diffs\n", "\n", "swaps = find_swaps()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "7916\n" ] } ], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [ "oh = API4['outfile']('swaps.txt')\n", "i = 0\n", "limit = 10\n", "for (p, wn) in swaps:\n", " i += 1\n", " if limit != None and i > limit: break\n", " show_swap(p, wn, swaps[(p, wn)])\n", " info = swaps[(p, wn)]\n", " oh.write(\"={}\\n#{}\\n\\n\".format(info[0], info[2]))\n", "oh.close()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "
GEN 01,11 word 8 m=186 n=18510<;FEB \u059a\u05e2\u05b5\u05e9\u05b6\u05c2\u05d1<;10FEB \u05e2\u05b5\u059a\u05e9\u05b6\u05c2\u05d1 \u05e2\u05b5\u059a\u05e9\u05b6\u05c2\u05d1
ETCBC3

 \u05d5\u05b7\u05d9\u05b9\u05bc\u05a3\u05d0\u05de\u05b6\u05e8 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0597\u05d9\u05dd \u05ea\u05b7\u05bc\u05bd\u05d3\u05b0\u05e9\u05b5\u05c1\u05a4\u05d0 \u05d4\u05b8\u05d0\u05b8\u05a8\u05e8\u05b6\u05e5\u0599 \u05d3\u05b6\u05bc\u0594\u05e9\u05b6\u05c1\u05d0 \u05e2\u05b5\u059a\u05e9\u05b6\u05c2\u05d1 \u05de\u05b7\u05d6\u05b0\u05e8\u05b4\u05a3\u05d9\u05e2\u05b7 \u05d6\u05b6\u0594\u05e8\u05b7\u05e2 \u05e2\u05b5\u05a3\u05e5 \u05e4\u05b0\u05bc\u05e8\u05b4\u059e\u05d9 \u05e2\u05b9\u05a4\u05e9\u05b6\u05c2\u05d4 \u05e4\u05b0\u05bc\u05e8\u05b4\u05d9\u0599 \u05dc\u05b0\u05de\u05b4\u05d9\u05e0\u05b9\u0594\u05d5 \u05d0\u05b2\u05e9\u05b6\u05c1\u05a5\u05e8 \u05d6\u05b7\u05e8\u05b0\u05e2\u05b9\u05d5\u05be\u05d1\u05b9\u0596\u05d5 \u05e2\u05b7\u05dc\u05be\u05d4\u05b8\u05d0\u05b8\u0591\u05e8\u05b6\u05e5 \u05d5\u05b7\u05bd\u05d9\u05b0\u05d4\u05b4\u05d9\u05be\u05db\u05b5\u05bd\u05df\u05c3

ETCBC4

 \u05d5\u05b7\u05d9\u05b9\u05bc\u05a3\u05d0\u05de\u05b6\u05e8 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0597\u05d9\u05dd \u05ea\u05b7\u05bc\u05bd\u05d3\u05b0\u05e9\u05b5\u05c1\u05a4\u05d0 \u05d4\u05b8\u05d0\u05b8\u05a8\u05e8\u05b6\u05e5\u0599 \u05d3\u05b6\u05bc\u0594\u05e9\u05b6\u05c1\u05d0 \u05e2\u05b5\u059a\u05e9\u05b6\u05c2\u05d1 \u05de\u05b7\u05d6\u05b0\u05e8\u05b4\u05a3\u05d9\u05e2\u05b7 \u05d6\u05b6\u0594\u05e8\u05b7\u05e2 \u05e2\u05b5\u05a3\u05e5 \u05e4\u05b0\u05bc\u05e8\u05b4\u059e\u05d9 \u05e2\u05b9\u05a4\u05e9\u05b6\u05c2\u05d4 \u05e4\u05b0\u05bc\u05e8\u05b4\u05d9\u0599 \u05dc\u05b0\u05de\u05b4\u05d9\u05e0\u05b9\u0594\u05d5 \u05d0\u05b2\u05e9\u05b6\u05c1\u05a5\u05e8 \u05d6\u05b7\u05e8\u05b0\u05e2\u05b9\u05d5\u05be\u05d1\u05b9\u0596\u05d5 \u05e2\u05b7\u05dc\u05be\u05d4\u05b8\u05d0\u05b8\u0591\u05e8\u05b6\u05e5 \u05d5\u05b7\u05bd\u05d9\u05b0\u05d4\u05b4\u05d9\u05be\u05db\u05b5\u05bd\u05df\u05c3\n", "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "
GEN 01,12 word 5 m=208 n=20714D.ECE> \u05a0\u05d3\u05b6\u05bc\u05e9\u05b6\u05c1\u05d0D.E14CE> \u05d3\u05b6\u05bc\u05a0\u05e9\u05b6\u05c1\u05d0 \u05d3\u05b6\u05bc\u05a0\u05e9\u05b6\u05c1\u05d0
ETCBC3

 \u05d5\u05b7\u05ea\u05b9\u05bc\u05d5\u05e6\u05b5\u05a8\u05d0 \u05d4\u05b8\u05d0\u05b8\u059c\u05e8\u05b6\u05e5 \u05d3\u05b6\u05bc\u05a0\u05e9\u05b6\u05c1\u05d0 \u05e2\u05b5\u05a3\u05e9\u05b6\u05c2\u05d1 \u05de\u05b7\u05d6\u05b0\u05e8\u05b4\u05a4\u05d9\u05e2\u05b7 \u05d6\u05b6\u05a8\u05e8\u05b7\u05e2\u0599 \u05dc\u05b0\u05de\u05b4\u05d9\u05e0\u05b5\u0594\u05d4\u05d5\u05bc \u05d5\u05b0\u05e2\u05b5\u05a7\u05e5 \u05e2\u05b9\u05bd\u05e9\u05b6\u05c2\u05d4\u05be\u05e4\u05b0\u05bc\u05e8\u05b4\u059b\u05d9 \u05d0\u05b2\u05e9\u05b6\u05c1\u05a5\u05e8 \u05d6\u05b7\u05e8\u05b0\u05e2\u05b9\u05d5\u05be\u05d1\u05b9\u0596\u05d5 \u05dc\u05b0\u05de\u05b4\u05d9\u05e0\u05b5\u0591\u05d4\u05d5\u05bc \u05d5\u05b7\u05d9\u05b7\u05bc\u05a5\u05e8\u05b0\u05d0 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0596\u05d9\u05dd \u05db\u05b4\u05bc\u05d9\u05be\u05d8\u05b9\u05bd\u05d5\u05d1\u05c3

ETCBC4

 \u05d5\u05b7\u05ea\u05b9\u05bc\u05d5\u05e6\u05b5\u05a8\u05d0 \u05d4\u05b8\u05d0\u05b8\u059c\u05e8\u05b6\u05e5 \u05d3\u05b6\u05bc\u05a0\u05e9\u05b6\u05c1\u05d0 \u05e2\u05b5\u05a3\u05e9\u05b6\u05c2\u05d1 \u05de\u05b7\u05d6\u05b0\u05e8\u05b4\u05a4\u05d9\u05e2\u05b7 \u05d6\u05b6\u05a8\u05e8\u05b7\u05e2\u0599 \u05dc\u05b0\u05de\u05b4\u05d9\u05e0\u05b5\u0594\u05d4\u05d5\u05bc \u05d5\u05b0\u05e2\u05b5\u05a7\u05e5 \u05e2\u05b9\u05a5\u05e9\u05b6\u05c2\u05d4 \u05e4\u05b0\u05bc\u05e8\u05b4\u059b\u05d9 \u05d0\u05b2\u05e9\u05b6\u05c1\u05a5\u05e8 \u05d6\u05b7\u05e8\u05b0\u05e2\u05b9\u05d5\u05be\u05d1\u05b9\u0596\u05d5 \u05dc\u05b0\u05de\u05b4\u05d9\u05e0\u05b5\u0591\u05d4\u05d5\u05bc \u05d5\u05b7\u05d9\u05b7\u05bc\u05a5\u05e8\u05b0\u05d0 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0596\u05d9\u05dd \u05db\u05b4\u05bc\u05d9\u05be\u05d8\u05b9\u05bd\u05d5\u05d1\u05c3\n", "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "
GEN 01,30 word 5 m=626 n=62514H@- \u05a0\u05d4\u05b8-H@14- \u05d4\u05b8\u05a0- \u05d4\u05b8\u05a0
ETCBC3

 \u05d5\u05bc\u05bd\u05dc\u05b0\u05db\u05b8\u05dc\u05be\u05d7\u05b7\u05d9\u05b7\u05bc\u05a3\u05ea \u05d4\u05b8\u05a0\u05d0\u05b8\u05e8\u05b6\u05e5 \u05d5\u05bc\u05dc\u05b0\u05db\u05b8\u05dc\u05be\u05e2\u05b9\u05a8\u05d5\u05e3 \u05d4\u05b7\u05e9\u05b8\u05bc\u05c1\u05de\u05b7\u059c\u05d9\u05b4\u05dd \u05d5\u05bc\u05dc\u05b0\u05db\u05b9\u05a3\u05dc \u05c0 \u05e8\u05b9\u05d5\u05de\u05b5\u05a3\u05e9\u05c2 \u05e2\u05b7\u05dc\u05be\u05d4\u05b8\u05d0\u05b8\u0597\u05e8\u05b6\u05e5 \u05d0\u05b2\u05e9\u05b6\u05c1\u05e8\u05be\u05d1\u05b9\u05bc\u05d5\u0599 \u05e0\u05b6\u05a3\u05e4\u05b6\u05e9\u05c1 \u05d7\u05b7\u05d9\u05b8\u05bc\u0594\u05d4 \u05d0\u05b6\u05ea\u05be\u05db\u05b8\u05bc\u05dc\u05be\u05d9\u05b6\u05a5\u05e8\u05b6\u05e7 \u05e2\u05b5\u0596\u05e9\u05b6\u05c2\u05d1 \u05dc\u05b0\u05d0\u05b8\u05db\u05b0\u05dc\u05b8\u0591\u05d4 \u05d5\u05b7\u05bd\u05d9\u05b0\u05d4\u05b4\u05d9\u05be\u05db\u05b5\u05bd\u05df\u05c3

ETCBC4

 \u05d5\u05bc\u05bd\u05dc\u05b0\u05db\u05b8\u05dc\u05be\u05d7\u05b7\u05d9\u05b7\u05bc\u05a3\u05ea \u05d4\u05b8\u05a0\u05d0\u05b8\u05e8\u05b6\u05e5 \u05d5\u05bc\u05dc\u05b0\u05db\u05b8\u05dc\u05be\u05e2\u05b9\u05a8\u05d5\u05e3 \u05d4\u05b7\u05e9\u05b8\u05bc\u05c1\u05de\u05b7\u059c\u05d9\u05b4\u05dd \u05d5\u05bc\u05dc\u05b0\u05db\u05b9\u05a3\u05dc \u05c0 \u05e8\u05b9\u05d5\u05de\u05b5\u05a3\u05e9\u05c2 \u05e2\u05b7\u05dc\u05be\u05d4\u05b8\u05d0\u05b8\u0597\u05e8\u05b6\u05e5 \u05d0\u05b2\u05e9\u05b6\u05c1\u05e8\u05be\u05d1\u05b9\u05bc\u05d5\u0599 \u05e0\u05b6\u05a3\u05e4\u05b6\u05e9\u05c1 \u05d7\u05b7\u05d9\u05b8\u05bc\u0594\u05d4 \u05d0\u05b6\u05ea\u05be\u05db\u05b8\u05bc\u05dc\u05be\u05d9\u05b6\u05a5\u05e8\u05b6\u05e7 \u05e2\u05b5\u0596\u05e9\u05b6\u05c2\u05d1 \u05dc\u05b0\u05d0\u05b8\u05db\u05b0\u05dc\u05b8\u0591\u05d4 \u05d5\u05b7\u05bd\u05d9\u05b0\u05d4\u05b4\u05d9\u05be\u05db\u05b5\u05bd\u05df\u05c3\n", "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "
GEN 02,05 word 6 m=750 n=74910VEREM \u059a\u05d8\u05b6\u05e8\u05b6\u05deVE10REM \u05d8\u05b6\u059a\u05e8\u05b6\u05de \u05d8\u05b6\u059a\u05e8\u05b6\u05dd
ETCBC3

 \u05d5\u05b0\u05db\u05b9\u05a3\u05dc \u05c0 \u05e9\u05b4\u05c2\u05a3\u05d9\u05d7\u05b7 \u05d4\u05b7\u05e9\u05b8\u05bc\u05c2\u05d3\u05b6\u0597\u05d4 \u05d8\u05b6\u059a\u05e8\u05b6\u05dd \u05d9\u05b4\u05bd\u05d4\u05b0\u05d9\u05b6\u05a3\u05d4 \u05d1\u05b8\u05d0\u05b8\u0594\u05e8\u05b6\u05e5 \u05d5\u05b0\u05db\u05b8\u05dc\u05be\u05e2\u05b5\u05a5\u05e9\u05b6\u05c2\u05d1 \u05d4\u05b7\u05e9\u05b8\u05bc\u05c2\u05d3\u05b6\u0596\u05d4 \u05d8\u05b6\u05a3\u05e8\u05b6\u05dd \u05d9\u05b4\u05e6\u05b0\u05de\u05b8\u0591\u05d7 \u05db\u05b4\u05bc\u05d9\u05a9 \u05dc\u05b9\u05a8\u05d0 \u05d4\u05b4\u05de\u05b0\u05d8\u05b4\u059c\u05d9\u05e8 \u05d9\u05b0\u05d4\u05d5\u05b8\u05a4\u05d4 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u05d9\u05dd\u0599 \u05e2\u05b7\u05dc\u05be\u05d4\u05b8\u05d0\u05b8\u0594\u05e8\u05b6\u05e5 \u05d5\u05b0\u05d0\u05b8\u05d3\u05b8\u05a3\u05dd \u05d0\u05b7\u0594\u05d9\u05b4\u05df \u05dc\u05b7\u05bd\u05e2\u05b2\u05d1\u05b9\u0596\u05d3 \u05d0\u05b6\u05ea\u05be\u05d4\u05b8\u05bd\u05d0\u05b2\u05d3\u05b8\u05de\u05b8\u05bd\u05d4\u05c3

ETCBC4

 \u05d5\u05b0\u05db\u05b9\u05a3\u05dc \u05c0 \u05e9\u05b4\u05c2\u05a3\u05d9\u05d7\u05b7 \u05d4\u05b7\u05e9\u05b8\u05bc\u05c2\u05d3\u05b6\u0597\u05d4 \u05d8\u05b6\u059a\u05e8\u05b6\u05dd \u05d9\u05b4\u05bd\u05d4\u05b0\u05d9\u05b6\u05a3\u05d4 \u05d1\u05b8\u05d0\u05b8\u0594\u05e8\u05b6\u05e5 \u05d5\u05b0\u05db\u05b8\u05dc\u05be\u05e2\u05b5\u05a5\u05e9\u05b6\u05c2\u05d1 \u05d4\u05b7\u05e9\u05b8\u05bc\u05c2\u05d3\u05b6\u0596\u05d4 \u05d8\u05b6\u05a3\u05e8\u05b6\u05dd \u05d9\u05b4\u05e6\u05b0\u05de\u05b8\u0591\u05d7 \u05db\u05b4\u05bc\u05d9\u05a9 \u05dc\u05b9\u05a8\u05d0 \u05d4\u05b4\u05de\u05b0\u05d8\u05b4\u059c\u05d9\u05e8 \u05d9\u05b0\u05d4\u05d5\u05b8\u05a4\u05d4 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u05d9\u05dd\u0599 \u05e2\u05b7\u05dc\u05be\u05d4\u05b8\u05d0\u05b8\u0594\u05e8\u05b6\u05e5 \u05d5\u05b0\u05d0\u05b8\u05d3\u05b8\u05a3\u05dd \u05d0\u05b7\u0594\u05d9\u05b4\u05df \u05dc\u05b7\u05bd\u05e2\u05b2\u05d1\u05b9\u0596\u05d3 \u05d0\u05b6\u05ea\u05be\u05d4\u05b8\u05bd\u05d0\u05b2\u05d3\u05b8\u05de\u05b8\u05bd\u05d4\u05c3\n", "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "
GEN 02,11 word 8 m=889 n=88810>;T \u059a\u05d0\u05b5\u05ea>;10T \u05d0\u05b5\u059a\u05ea \u05d0\u05b5\u059a\u05ea
ETCBC3

 \u05e9\u05b5\u05c1\u05a5\u05dd \u05d4\u05b8\u05bd\u05d0\u05b6\u05d7\u05b8\u0596\u05d3 \u05e4\u05b4\u05bc\u05d9\u05e9\u05b9\u05c1\u0591\u05d5\u05df \u05d4\u05a3\u05d5\u05bc\u05d0 \u05d4\u05b7\u05e1\u05b9\u05bc\u05d1\u05b5\u0597\u05d1 \u05d0\u05b5\u059a\u05ea \u05db\u05b8\u05bc\u05dc\u05be\u05d0\u05b6\u05a3\u05e8\u05b6\u05e5 \u05d4\u05b7\u05bd\u05d7\u05b2\u05d5\u05b4\u05d9\u05dc\u05b8\u0594\u05d4 \u05d0\u05b2\u05e9\u05b6\u05c1\u05e8\u05be\u05e9\u05b8\u05c1\u0596\u05dd \u05d4\u05b7\u05d6\u05b8\u05bc\u05d4\u05b8\u05bd\u05d1\u05c3

ETCBC4

 \u05e9\u05b5\u05c1\u05a5\u05dd \u05d4\u05b8\u05bd\u05d0\u05b6\u05d7\u05b8\u0596\u05d3 \u05e4\u05b4\u05bc\u05d9\u05e9\u05b9\u05c1\u0591\u05d5\u05df \u05d4\u05a3\u05d5\u05bc\u05d0 \u05d4\u05b7\u05e1\u05b9\u05bc\u05d1\u05b5\u0597\u05d1 \u05d0\u05b5\u059a\u05ea \u05db\u05b8\u05bc\u05dc\u05be\u05d0\u05b6\u05a3\u05e8\u05b6\u05e5 \u05d4\u05b7\u05bd\u05d7\u05b2\u05d5\u05b4\u05d9\u05dc\u05b8\u0594\u05d4 \u05d0\u05b2\u05e9\u05b6\u05c1\u05e8\u05be\u05e9\u05b8\u05c1\u0596\u05dd \u05d4\u05b7\u05d6\u05b8\u05bc\u05d4\u05b8\u05bd\u05d1\u05c3\n", "

\n", "\n", "\n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "
GEN 02,23 word 8 m=1123 n=112210\n", "  \u059a\u05e2\u05b6\u05e6\u05b6\u05de\n", "  \u05e2\u05b6\u059a\u05e6\u05b6\u05de \u05e2\u05b6\u059a\u05e6\u05b6\u05dd
ETCBC3

 \u05d5\u05b7\u05d9\u05b9\u05bc\u05d0\u05de\u05b6\u05e8\u05ae \u05d4\u05b8\u05bd\u05d0\u05b8\u05d3\u05b8\u05dd\u0592 \u05d6\u05b9\u05a3\u05d0\u05ea \u05d4\u05b7\u05e4\u05b7\u05bc\u0597\u05e2\u05b7\u05dd \u05e2\u05b6\u059a\u05e6\u05b6\u05dd \u05de\u05b5\u05bd\u05e2\u05b2\u05e6\u05b8\u05de\u05b7\u0594\u05d9 \u05d5\u05bc\u05d1\u05b8\u05e9\u05b8\u05c2\u0596\u05e8 \u05de\u05b4\u05d1\u05b0\u05bc\u05e9\u05b8\u05c2\u05e8\u05b4\u0591\u05d9 \u05dc\u05b0\u05d6\u05b9\u05d0\u05ea\u0599 \u05d9\u05b4\u05e7\u05b8\u05bc\u05e8\u05b5\u05a3\u05d0 \u05d0\u05b4\u05e9\u05b8\u05bc\u05c1\u0594\u05d4 \u05db\u05b4\u05bc\u05a5\u05d9 \u05de\u05b5\u05d0\u05b4\u0596\u05d9\u05e9\u05c1 \u05dc\u05bb\u05bd\u05e7\u05b3\u05d7\u05b8\u05d4\u05be\u05d6\u05b9\u05bc\u05bd\u05d0\u05ea\u05c3

ETCBC4

 \u05d5\u05b7\u05d9\u05b9\u05bc\u05d0\u05de\u05b6\u05e8\u05ae \u05d4\u05b8\u05bd\u05d0\u05b8\u05d3\u05b8\u05dd\u0592 \u05d6\u05b9\u05a3\u05d0\u05ea \u05d4\u05b7\u05e4\u05b7\u05bc\u0597\u05e2\u05b7\u05dd \u05e2\u05b6\u059a\u05e6\u05b6\u05dd \u05de\u05b5\u05bd\u05e2\u05b2\u05e6\u05b8\u05de\u05b7\u0594\u05d9 \u05d5\u05bc\u05d1\u05b8\u05e9\u05b8\u05c2\u0596\u05e8 \u05de\u05b4\u05d1\u05b0\u05bc\u05e9\u05b8\u05c2\u05e8\u05b4\u0591\u05d9 \u05dc\u05b0\u05d6\u05b9\u05d0\u05ea\u0599 \u05d9\u05b4\u05e7\u05b8\u05bc\u05e8\u05b5\u05a3\u05d0 \u05d0\u05b4\u05e9\u05b8\u05bc\u05c1\u0594\u05d4 \u05db\u05b4\u05bc\u05a5\u05d9 \u05de\u05b5\u05d0\u05b4\u0596\u05d9\u05e9\u05c1 \u05dc\u05bb\u05bd\u05e7\u05b3\u05d7\u05b8\u05d4\u05be\u05d6\u05b9\u05bc\u05bd\u05d0\u05ea\u05c3\n", "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "
GEN 03,01 word 20 m=1187 n=118610>AP \u059a\u05d0\u05b7\u05e4>A10P \u05d0\u05b7\u059a\u05e4 \u05d0\u05b7\u059a\u05e3
ETCBC3

 \u05d5\u05b0\u05d4\u05b7\u05e0\u05b8\u05bc\u05d7\u05b8\u05e9\u05c1\u0599 \u05d4\u05b8\u05d9\u05b8\u05a3\u05d4 \u05e2\u05b8\u05e8\u0594\u05d5\u05bc\u05dd \u05de\u05b4\u05db\u05b9\u05bc\u05dc\u0599 \u05d7\u05b7\u05d9\u05b7\u05bc\u05a3\u05ea \u05d4\u05b7\u05e9\u05b8\u05bc\u05c2\u05d3\u05b6\u0594\u05d4 \u05d0\u05b2\u05e9\u05b6\u05c1\u05a5\u05e8 \u05e2\u05b8\u05e9\u05b8\u05c2\u0596\u05d4 \u05d9\u05b0\u05d4\u05d5\u05b8\u05a3\u05d4 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0591\u05d9\u05dd \u05d5\u05b7\u05d9\u05b9\u05bc\u05a8\u05d0\u05de\u05b6\u05e8\u0599 \u05d0\u05b6\u05dc\u05be\u05d4\u05b8\u05a3\u05d0\u05b4\u05e9\u05b8\u05bc\u05c1\u0594\u05d4 \u05d0\u05b7\u059a\u05e3 \u05db\u05b4\u05bc\u05bd\u05d9\u05be\u05d0\u05b8\u05de\u05b7\u05a3\u05e8 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0594\u05d9\u05dd \u05dc\u05b9\u05a3\u05d0 \u05ea\u05b9\u05bd\u05d0\u05db\u05b0\u05dc\u0594\u05d5\u05bc \u05de\u05b4\u05db\u05b9\u05bc\u0596\u05dc \u05e2\u05b5\u05a5\u05e5 \u05d4\u05b7\u05d2\u05b8\u05bc\u05bd\u05df\u05c3

ETCBC4

 \u05d5\u05b0\u05d4\u05b7\u05e0\u05b8\u05bc\u05d7\u05b8\u05e9\u05c1\u0599 \u05d4\u05b8\u05d9\u05b8\u05a3\u05d4 \u05e2\u05b8\u05e8\u0594\u05d5\u05bc\u05dd \u05de\u05b4\u05db\u05b9\u05bc\u05dc\u0599 \u05d7\u05b7\u05d9\u05b7\u05bc\u05a3\u05ea \u05d4\u05b7\u05e9\u05b8\u05bc\u05c2\u05d3\u05b6\u0594\u05d4 \u05d0\u05b2\u05e9\u05b6\u05c1\u05a5\u05e8 \u05e2\u05b8\u05e9\u05b8\u05c2\u0596\u05d4 \u05d9\u05b0\u05d4\u05d5\u05b8\u05a3\u05d4 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0591\u05d9\u05dd \u05d5\u05b7\u05d9\u05b9\u05bc\u05a8\u05d0\u05de\u05b6\u05e8\u0599 \u05d0\u05b6\u05dc\u05be\u05d4\u05b8\u05a3\u05d0\u05b4\u05e9\u05b8\u05bc\u05c1\u0594\u05d4 \u05d0\u05b7\u059a\u05e3 \u05db\u05b4\u05bc\u05bd\u05d9\u05be\u05d0\u05b8\u05de\u05b7\u05a3\u05e8 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0594\u05d9\u05dd \u05dc\u05b9\u05a3\u05d0 \u05ea\u05b9\u05bd\u05d0\u05db\u05b0\u05dc\u0594\u05d5\u05bc \u05de\u05b4\u05db\u05b9\u05bc\u0596\u05dc \u05e2\u05b5\u05a5\u05e5 \u05d4\u05b7\u05d2\u05b8\u05bc\u05bd\u05df\u05c3\n", "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "
GEN 03,05 word 1 m=1242 n=124110K.IJ \u059a\u05db\u05b4\u05bc\u05d9K.I10J \u05db\u05b4\u05bc\u059a\u05d9 \u05db\u05b4\u05bc\u059a\u05d9
ETCBC3

 \u05db\u05b4\u05bc\u059a\u05d9 \u05d9\u05b9\u05d3\u05b5\u05a3\u05e2\u05b7 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0594\u05d9\u05dd \u05db\u05b4\u05bc\u0597\u05d9 \u05d1\u05b0\u05bc\u05d9\u05b9\u05d5\u05dd\u0599 \u05d0\u05b2\u05db\u05b8\u05dc\u05b0\u05db\u05b6\u05a3\u05dd \u05de\u05b4\u05de\u05b6\u05bc\u0594\u05e0\u05bc\u05d5\u05bc \u05d5\u05b0\u05e0\u05b4\u05e4\u05b0\u05e7\u05b0\u05d7\u0596\u05d5\u05bc \u05e2\u05b5\u05bd\u05d9\u05e0\u05b5\u05d9\u05db\u05b6\u0591\u05dd \u05d5\u05b4\u05d4\u05b0\u05d9\u05b4\u05d9\u05ea\u05b6\u05dd\u0599 \u05db\u05b5\u05bc\u05bd\u05d0\u05dc\u05b9\u05d4\u05b4\u0594\u05d9\u05dd \u05d9\u05b9\u05d3\u05b0\u05e2\u05b5\u0596\u05d9 \u05d8\u05b9\u05a5\u05d5\u05d1 \u05d5\u05b8\u05e8\u05b8\u05bd\u05e2\u05c3

ETCBC4

 \u05db\u05b4\u05bc\u059a\u05d9 \u05d9\u05b9\u05d3\u05b5\u05a3\u05e2\u05b7 \u05d0\u05b1\u05dc\u05b9\u05d4\u05b4\u0594\u05d9\u05dd \u05db\u05b4\u05bc\u0597\u05d9 \u05d1\u05b0\u05bc\u05d9\u05b9\u05d5\u05dd\u0599 \u05d0\u05b2\u05db\u05b8\u05dc\u05b0\u05db\u05b6\u05a3\u05dd \u05de\u05b4\u05de\u05b6\u05bc\u0594\u05e0\u05bc\u05d5\u05bc \u05d5\u05b0\u05e0\u05b4\u05e4\u05b0\u05e7\u05b0\u05d7\u0596\u05d5\u05bc \u05e2\u05b5\u05bd\u05d9\u05e0\u05b5\u05d9\u05db\u05b6\u0591\u05dd \u05d5\u05b4\u05d4\u05b0\u05d9\u05b4\u05d9\u05ea\u05b6\u05dd\u0599 \u05db\u05b5\u05bc\u05bd\u05d0\u05dc\u05b9\u05d4\u05b4\u0594\u05d9\u05dd \u05d9\u05b9\u05d3\u05b0\u05e2\u05b5\u0596\u05d9 \u05d8\u05b9\u05a5\u05d5\u05d1 \u05d5\u05b8\u05e8\u05b8\u05bd\u05e2\u05c3\n", "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] }, { "html": [ "\n", "
GEN 03,11 word 3 m=1372 n=137110MIJ \u059a\u05de\u05b4\u05d9MI10J \u05de\u05b4\u059a\u05d9 \u05de\u05b4\u059a\u05d9
ETCBC3

 \u05d5\u05b7\u05d9\u05b9\u05bc\u0595\u05d0\u05de\u05b6\u05e8 \u05de\u05b4\u059a\u05d9 \u05d4\u05b4\u05d2\u05b4\u05bc\u05a3\u05d9\u05d3 \u05dc\u05b0\u05da\u05b8\u0594 \u05db\u05b4\u05bc\u05a5\u05d9 \u05e2\u05b5\u05d9\u05e8\u05b9\u0596\u05dd \u05d0\u05b8\u0591\u05ea\u05b8\u05bc\u05d4 \u05d4\u05b2\u05de\u05b4\u05df\u05be\u05d4\u05b8\u05e2\u05b5\u0597\u05e5 \u05d0\u05b2\u05e9\u05b6\u05c1\u05a7\u05e8 \u05e6\u05b4\u05d5\u05b4\u05bc\u05d9\u05ea\u05b4\u059b\u05d9\u05da\u05b8 \u05dc\u05b0\u05d1\u05b4\u05dc\u05b0\u05ea\u05b4\u05bc\u05a5\u05d9 \u05d0\u05b2\u05db\u05b8\u05dc\u05be\u05de\u05b4\u05de\u05b6\u05bc\u0596\u05e0\u05bc\u05d5\u05bc \u05d0\u05b8\u05db\u05b8\u05bd\u05dc\u05b0\u05ea\u05b8\u05bc\u05c3

ETCBC4

 \u05d5\u05b7\u05d9\u05b9\u05bc\u0595\u05d0\u05de\u05b6\u05e8 \u05de\u05b4\u059a\u05d9 \u05d4\u05b4\u05d2\u05b4\u05bc\u05a3\u05d9\u05d3 \u05dc\u05b0\u05da\u05b8\u0594 \u05db\u05b4\u05bc\u05a5\u05d9 \u05e2\u05b5\u05d9\u05e8\u05b9\u0596\u05dd \u05d0\u05b8\u0591\u05ea\u05b8\u05bc\u05d4 \u05d4\u05b2\u05de\u05b4\u05df\u05be\u05d4\u05b8\u05e2\u05b5\u0597\u05e5 \u05d0\u05b2\u05e9\u05b6\u05c1\u05a7\u05e8 \u05e6\u05b4\u05d5\u05b4\u05bc\u05d9\u05ea\u05b4\u059b\u05d9\u05da\u05b8 \u05dc\u05b0\u05d1\u05b4\u05dc\u05b0\u05ea\u05b4\u05bc\u05a5\u05d9 \u05d0\u05b2\u05db\u05b8\u05dc\u05be\u05de\u05b4\u05de\u05b6\u05bc\u0596\u05e0\u05bc\u05d5\u05bc \u05d0\u05b8\u05db\u05b8\u05bd\u05dc\u05b0\u05ea\u05b8\u05bc\u05c3\n", "

\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "" ], "metadata": {}, "output_type": "display_data", "text": [ "" ] } ], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "print(Transcription.to_hebrew_x('M.A33JIm03'))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\u05de\u05b7\u05bc\u05a8\u05d9\u05b4\u05dd\u0599\n" ] } ], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "import collections\n", "from IPython.display import clear_output, display, HTML\n", "from etcbc.lib import Transcription\n", "\n", "def htrans(tr):\n", " return Transcription.to_hebrew_x(tr)\n", "\n", "def outfile(name):\n", " h = open('/Users/dirk/Downloads/{}'.format(name), 'w')\n", " h.write('''\n", "\n", "\n", "\n", "\n", "
GEN 03,15 word 15 m=1470 n=146910HW.> \u059a\u05d4\u05d5\u05bc\u05d0H10W.> \u05d4\u059a\u05d5\u05bc\u05d0 \u05d4\u059a\u05d5\u05bc\u05d0
ETCBC3

 \u05d5\u05b0\u05d0\u05b5\u05d9\u05d1\u05b8\u05a3\u05d4 \u05c0 \u05d0\u05b8\u05e9\u05b4\u05c1\u0597\u05d9\u05ea \u05d1\u05b5\u05bc\u05bd\u05d9\u05e0\u05b0\u05da\u05b8\u0599 \u05d5\u05bc\u05d1\u05b5\u05a3\u05d9\u05df \u05d4\u05b8\u05bd\u05d0\u05b4\u05e9\u05b8\u05bc\u05c1\u0594\u05d4 \u05d5\u05bc\u05d1\u05b5\u05a5\u05d9\u05df \u05d6\u05b7\u05e8\u05b0\u05e2\u05b2\u05da\u05b8\u0596 \u05d5\u05bc\u05d1\u05b5\u05a3\u05d9\u05df \u05d6\u05b7\u05e8\u05b0\u05e2\u05b8\u0591\u05d4\u05bc \u05d4\u059a\u05d5\u05bc\u05d0 \u05d9\u05b0\u05e9\u05c1\u05d5\u05bc\u05e4\u05b0\u05da\u05b8\u05a3 \u05e8\u05b9\u0594\u05d0\u05e9\u05c1 \u05d5\u05b0\u05d0\u05b7\u05ea\u05b8\u05bc\u0596\u05d4 \u05ea\u05b0\u05bc\u05e9\u05c1\u05d5\u05bc\u05e4\u05b6\u05a5\u05e0\u05bc\u05d5\u05bc \u05e2\u05b8\u05e7\u05b5\u05bd\u05d1\u05c3 \u05e1

ETCBC4

 \u05d5\u05b0\u05d0\u05b5\u05d9\u05d1\u05b8\u05a3\u05d4 \u05c0 \u05d0\u05b8\u05e9\u05b4\u05c1\u0597\u05d9\u05ea \u05d1\u05b5\u05bc\u05bd\u05d9\u05e0\u05b0\u05da\u05b8\u0599 \u05d5\u05bc\u05d1\u05b5\u05a3\u05d9\u05df \u05d4\u05b8\u05bd\u05d0\u05b4\u05e9\u05b8\u05bc\u05c1\u0594\u05d4 \u05d5\u05bc\u05d1\u05b5\u05a5\u05d9\u05df \u05d6\u05b7\u05e8\u05b0\u05e2\u05b2\u05da\u05b8\u0596 \u05d5\u05bc\u05d1\u05b5\u05a3\u05d9\u05df \u05d6\u05b7\u05e8\u05b0\u05e2\u05b8\u0591\u05d4\u05bc \u05d4\u059a\u05d5\u05bc\u05d0 \u05d9\u05b0\u05e9\u05c1\u05d5\u05bc\u05e4\u05b0\u05da\u05b8\u05a3 \u05e8\u05b9\u0594\u05d0\u05e9\u05c1 \u05d5\u05b0\u05d0\u05b7\u05ea\u05b8\u05bc\u0596\u05d4 \u05ea\u05b0\u05bc\u05e9\u05c1\u05d5\u05bc\u05e4\u05b6\u05a5\u05e0\u05bc\u05d5\u05bc \u05e2\u05b8\u05e7\u05b5\u05bd\u05d1\u05c3 \u05e1\n", "

\n", "''')\n", " return h\n", "\n", "font = dict(\n", "sil='''\n", "font-family: Ezra SIL;\n", "font-size: 20pt;\n", "line-height:28pt;\n", "margin-right:0.5em;\n", "direction:rtl;\n", "unicode-bidi:bidi-override;\n", "text-align: right;\n", "''',\n", "sbl='''\n", "font-family: SBL Hebrew;\n", "font-size: 24pt;\n", "line-height:28pt;\n", "margin-right:0.5em;\n", "direction:rtl;\n", "unicode-bidi:bidi-override;\n", "text-align: right;\n", "''')\n", "\n", "hfile = outfile('hebtest.html')\n", "\n", "def hebnormal(heb):\n", " #print(\"{}\\n\".format(''.join(trans[c] for c in heb)))\n", " comps = heb.split(' ')\n", " plain = ''\n", " spanned = ''\n", " first = True\n", " sep = ''\n", " for comp in comps:\n", " plain += sep + ''.join(htrans(word) for word in comp.split('-'))\n", " spanned += sep + ''.join(''.format(word) + htrans(word) + '' for word in comp.split('-'))\n", " if sep == '': sep = ' '\n", " \n", " for f in sorted(font):\n", " for text in (spanned, plain):\n", " para = '''

{}

'''.format(font[f], text)\n", " display(HTML(para))\n", " hfile.write(para + '\\n')\n", "\n", "klegenda = ('adapted', 'spanned', 'plain')\n", "kcolor = (('#ffddbb','#ffeecc'), ('#ffbbbb','#ffcccc'), ('#bbffbb','#ccffcc'))\n", "plegenda = ('x y', 'x-y', 'xy')\n", "\n", "not_to_be_adapted = {\n", " '&', '.', '.c', '.f', '00', '01', '05', 'O',\n", "}\n", "to_be_adapted = {\n", " '*', ',', '02', '03', '04', '10', '11', '13', '14',\n", " '24', '33', '35', '44', '52', '53', '60', '61', '62',\n", " '63', '64', '65', '70', '71', '72', '73', '74', '75',\n", " '80', '81', '82', '83', '84', '85', '91', '92', '93',\n", " '94', '95', \n", " ':', ':@', ':A', ':E', ';', '@', 'A', 'E', 'I', 'U',\n", "}\n", "\n", "# 02, 03, 04, 10, 13, 24, 84: \n", "# sbl goes wrong in firefox: eats space in x y plain and adapted\n", "\n", "# 14, 44:\n", "# even after adaptation still very tight\n", "\n", "# @, A:\n", "# In SBL: heth discards more after-space than he\n", "\n", "cnotadapt = 0\n", "cadapt = 0\n", "cremaining = 0\n", "cskip = 0\n", "cdone = ''\n", "first = True\n", "for x in sorted(Transcription.hebrew_mapping):\n", " if (x.isalpha() and x not in {'A', 'E', 'I', 'O', 'U'}) or x in {'<', '>', '#'}: \n", " cskip +=1\n", " continue\n", " if x in {'55', '56', '57', '_'}:\n", " cskip += 1\n", " continue\n", " if x in not_to_be_adapted: \n", " cnotadapt +=1\n", " continue\n", " if x in to_be_adapted:\n", " cadapt +=1\n", " continue\n", " if not first:\n", " cremaining += 1\n", " continue\n", " data = collections.defaultdict(lambda: collections.defaultdict(lambda: []))\n", " for cons in ('>', 'H', 'X', '<', 'W', '#'):\n", " for (p, pat) in enumerate(('{} {}', '{}-{}', '{}{}')):\n", " heb = pat.format(cons + x, 'B')\n", " comps = heb.split(' ')\n", " plain = ''\n", " spanned = ''\n", " aspanned = ''\n", " first = True\n", " sep = ''\n", " for comp in comps:\n", " plain += sep + ''.join(htrans(word) for word in comp.split('-'))\n", " spanned += sep + ''.join(''.format(word) + htrans(word) + '' for word in comp.split('-'))\n", " aspanned += sep + ''.join(''.format(word) + htrans(word) + ' ' for word in comp.split('-'))\n", " if sep == '': sep = ' '\n", " if p != 1:\n", " data[2][p].append((heb, plain))\n", " data[1][p].append((heb, spanned))\n", " data[0][p].append((heb, aspanned))\n", " for k in sorted(data):\n", " for p in sorted(data[k]):\n", " for (heb, text) in data[k][p]:\n", " for (f, fnt) in enumerate(sorted(font)):\n", " para = '''\n", "\n", "\n", "\n", "\n", " '''.format(\n", " kcolor[k][f], \n", " x.replace('&', '&'), klegenda[k], plegenda[p], \n", " heb.replace('&','&').replace('<','<').replace('>','>'), \n", " fnt,\n", " font[fnt],\n", " text,\n", " )\n", " hfile.write(para)\n", " cdone = x\n", " first = False\n", "print('''\n", "Skipped = {:>3}\n", "To be adapted = {:>3}\n", "Not to be adapted = {:>3}\n", "Done = '{}'\n", "Remaining = {:>3}\n", "'''.format(cskip, cadapt, cnotadapt, cdone, cremaining)\n", ") \n", "\n", "\n", "examples = (\n", " 'HA-M.A33JIm03 >:ACER03',\n", " 'XA M.A33JIm03 >:ACER03',\n", " 'XA-M.A33JIm03 >:ACER03',\n", " 'XAM.A33JIm03 >:ACER03',\n", " 'XA B',\n", " '', '#'}: continue\n", " hebadapted(x, 'HA-M.A33JIm{} >:ACER03'.format(x))\n", " \n", "for e in examples:\n", " hebnormal(e)\n", "'''\n", "\n", "hfile.write('''\n", "
{}{}{}{}{}{}
\n", "\n", "\n", "''')\n", "hfile.close()\n" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Skipped = 34\n", "To be adapted = 48\n", "Not to be adapted = 8\n", "Done = ''\n", "Remaining = 0\n", "\n" ] } ], "prompt_number": 138 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }