| Name | \n", "# of nodes | \n", "# slots / node | \n", "% coverage | \n", "
|---|---|---|---|
| book | \n", "5 | \n", "79878.40 | \n", "100 | \n", "
| chapter | \n", "187 | \n", "2135.79 | \n", "100 | \n", "
| verse | \n", "5841 | \n", "68.38 | \n", "100 | \n", "
| word | \n", "114890 | \n", "3.48 | \n", "100 | \n", "
| sign | \n", "399392 | \n", "1.00 | \n", "100 | \n", "
3DT-UCPH/spC:/Users/tonyj/text-fabric-data/github/DT-UCPH/sp/appg0c9b2fff6448228af93ed6c466ba95e6c0bb3547''layoutRichorig{docRoot}/bhsa''''https://etcbc.github.io0_home{}TruelocalC:/Users/tonyj/text-fabric-data/github/DT-UCPH/sp/_tempThe Samaritan PentateuchDT-UCPH/tfsp3.4v3.4{verse}{verse}Truelex}hbo| Name | \n", "# of nodes | \n", "# slots / node | \n", "% coverage | \n", "
|---|---|---|---|
| book | \n", "39 | \n", "10938.21 | \n", "100 | \n", "
| chapter | \n", "929 | \n", "459.19 | \n", "100 | \n", "
| lex | \n", "9230 | \n", "46.22 | \n", "100 | \n", "
| verse | \n", "23213 | \n", "18.38 | \n", "100 | \n", "
| half_verse | \n", "45179 | \n", "9.44 | \n", "100 | \n", "
| sentence | \n", "63717 | \n", "6.70 | \n", "100 | \n", "
| sentence_atom | \n", "64514 | \n", "6.61 | \n", "100 | \n", "
| clause | \n", "88131 | \n", "4.84 | \n", "100 | \n", "
| clause_atom | \n", "90704 | \n", "4.70 | \n", "100 | \n", "
| phrase | \n", "253203 | \n", "1.68 | \n", "100 | \n", "
| phrase_atom | \n", "267532 | \n", "1.59 | \n", "100 | \n", "
| subphrase | \n", "113850 | \n", "1.42 | \n", "38 | \n", "
| word | \n", "426590 | \n", "1.00 | \n", "100 | \n", "
3etcbc/bhsaC:/Users/tonyj/text-fabric-data/github/etcbc/bhsa/appgd905e3fb6e80d0fa537600337614adc2af157309''<code>Genesis 1:1</code> (use <a href=\"https://github.com/{org}/{repo}/blob/master/tf/{version}/book%40en.tf\" target=\"_blank\">English book names</a>)g_uvf_utf8g_vbskq_hybridlanguageISOg_nmelex0is_rootg_vbs_utf8g_uvfdistrootsuffix_persong_vbedist_unitsuffix_numberdistributional_parentkq_hybrid_utf8crossrefSETinstructiong_prslexeme_countrank_occg_pfm_utf8freq_occcrossrefLCSfunctional_parentg_pfmg_nme_utf8g_vbe_utf8kindg_prs_utf8suffix_gendermother_object_typenoneunknownNA{docRoot}/{repo}''''https://{org}.github.io0_home{}TruelocalC:/Users/tonyj/text-fabric-data/github/etcbc/bhsa/_tempBHSA = Biblia Hebraica Stuttgartensia Amstelodamensis10.5281/zenodo.1007624Phonetic Transcriptionshttps://nbviewer.jupyter.org/github/etcbc/phono/blob/master/programs/phono.ipynb10.5281/zenodo.1007636etcbc/tfphonoParallel Passageshttps://nbviewer.jupyter.org/github/etcbc/parallels/blob/master/programs/parallels.ipynb10.5281/zenodo.1007642etcbc/tfparallelsetcbc/tfbhsa2021https://shebanq.ancient-data.org/hebrewShow this on SHEBANQlaTrue{webBase}/text?book=<1>&chapter=<2>&verse=<3>&version={version}&mr=m&qw=q&tp=txt_p&tr=hb&wget=v&qget=v&nget=vt{webBase}/word?version={version}&id=<lid>v1.8{typ} {rela}''True{code}1''True{label}''Truegloss{voc_lex_utf8}wordorig{voc_lex_utf8}{typ} {function}''True{typ} {rela}1''{number}''True{number}1''True{number}''pdp vs vtlex:glosshboData generated by `delta_mt_and_sp.ipynb` at `github.com/tonyjurg/Parashot`
`'" ] }, { "cell_type": "code", "execution_count": 4, "id": "06e216e5-a5b1-47d6-994c-f4e8c6a66e90", "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "'<=' not supported between instances of 'NoneType' and 'int'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[1;32mIn[4], line 17\u001b[0m\n\u001b[0;32m 14\u001b[0m verseTexts[verseName] \u001b[38;5;241m=\u001b[39m verseText\u001b[38;5;241m.\u001b[39mstrip()\n\u001b[0;32m 15\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m verseTexts\n\u001b[1;32m---> 17\u001b[0m SPverses \u001b[38;5;241m=\u001b[39m \u001b[43mreconstructVerses\u001b[49m\u001b[43m(\u001b[49m\u001b[43mFsp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mLsp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mTsp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mg_cons\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbookChapterVerseList\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 18\u001b[0m MTverses \u001b[38;5;241m=\u001b[39m reconstructVerses(Fmt, Lmt, Tmt, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mg_cons\u001b[39m\u001b[38;5;124m'\u001b[39m, bookChapterVerseList)\n", "Cell \u001b[1;32mIn[4], line 8\u001b[0m, in \u001b[0;36mreconstructVerses\u001b[1;34m(F, L, T, textFeature, inputList)\u001b[0m\n\u001b[0;32m 6\u001b[0m verseText \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 7\u001b[0m verseNode \u001b[38;5;241m=\u001b[39m T\u001b[38;5;241m.\u001b[39mnodeFromSection(verseName)\n\u001b[1;32m----> 8\u001b[0m wordNodes \u001b[38;5;241m=\u001b[39m \u001b[43mL\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43md\u001b[49m\u001b[43m(\u001b[49m\u001b[43mverseNode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mword\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 9\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m wordNode \u001b[38;5;129;01min\u001b[39;00m wordNodes:\n\u001b[0;32m 10\u001b[0m wordText \u001b[38;5;241m=\u001b[39m \u001b[38;5;28meval\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mF.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtextFeature\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.v(wordNode)\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", "File \u001b[1;32m~\\anaconda3\\envs\\Text-Fabric\\Lib\\site-packages\\tf\\core\\locality.py:182\u001b[0m, in \u001b[0;36mLocality.d\u001b[1;34m(self, n, otype)\u001b[0m\n\u001b[0;32m 180\u001b[0m fOtype \u001b[38;5;241m=\u001b[39m Fotype\u001b[38;5;241m.\u001b[39mv\n\u001b[0;32m 181\u001b[0m maxSlot \u001b[38;5;241m=\u001b[39m Fotype\u001b[38;5;241m.\u001b[39mmaxSlot\n\u001b[1;32m--> 182\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mn\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m<\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmaxSlot\u001b[49m:\n\u001b[0;32m 183\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m()\n\u001b[0;32m 184\u001b[0m maxNode \u001b[38;5;241m=\u001b[39m Fotype\u001b[38;5;241m.\u001b[39mmaxNode\n", "\u001b[1;31mTypeError\u001b[0m: '<=' not supported between instances of 'NoneType' and 'int'" ] } ], "source": [ "# Function to reconstruct verses\n", "def reconstructVerses(F, L, T, textFeature, inputList):\n", " \"\"\"Reconstruct text for each verse.\"\"\"\n", " verseTexts = {}\n", " for verseName in inputList:\n", " verseText = ''\n", " verseNode = T.nodeFromSection(verseName)\n", " wordNodes = L.d(verseNode, 'word')\n", " for wordNode in wordNodes:\n", " wordText = eval(f'F.{textFeature}.v(wordNode)')\n", " trailer = F.trailer.v(wordNode)\n", " if wordText:\n", " verseText += wordText + (trailer if trailer else ' ')\n", " verseTexts[verseName] = verseText.strip()\n", " return verseTexts\n", " \n", "SPverses = reconstructVerses(Fsp, Lsp, Tsp, 'g_cons', bookChapterVerseList)\n", "MTverses = reconstructVerses(Fmt, Lmt, Tmt, 'g_cons', bookChapterVerseList)" ] }, { "cell_type": "code", "execution_count": null, "id": "492e7cbe-2b84-48a9-a44c-6b6d051b0c5d", "metadata": {}, "outputs": [], "source": [ "from difflib import SequenceMatcher\n", "from IPython.display import HTML, display\n", "\n", "def highlightMatches(baseText, comparisonText):\n", " matcher = SequenceMatcher(None, baseText, comparisonText)\n", " highlightedComparisonText = \"\" \n", " for tag, i1, i2, j1, j2 in matcher.get_opcodes():\n", " if tag == \"equal\": # Identical parts\n", " highlightedComparisonText += comparisonText[j1:j2]\n", " else: # Non-matching parts\n", " highlightedComparisonText += f'{comparisonText[j1:j2]}' \n", " return highlightedComparisonText\n", "\n", "def cleanText(text):\n", " replacements = [\n", " # for the transcoded strings\n", " ('00_P', ''), # Remove '00_P'\n", " ('00_S', ''), # Remove '00_S'\n", " ('00', ''), # Remove '00'\n", " ('&', ' '), # Replace '&' with a space\n", " # for the Hebrew strings\n", " ('ס ', ''), # Final Samekh\n", " ('פ ', ''), # Final Pe\n", " ('׃', ''), # End of verse\n", " ('־',' ') # maqaf\n", " ]\n", " # Apply each replacement\n", " for old, new in replacements:\n", " text = text.replace(old, new)\n", " return text\n", "\n", "# Function to format and highlight verse differences between MT and SP\n", "def formatAndHighlight(label, MTverseText, SPverseText):\n", " book, chapter, verse = label\n", " MTverseNode = Tmt.nodeFromSection(label)\n", " MTtext = cleanText(Tmt.text(MTverseNode, \"text-orig-plain\"))\n", " SPverseNode = Tsp.nodeFromSection(label)\n", " SPtext = Tsp.text(SPverseNode)\n", " SPmarkedText = highlightMatches(MTtext, SPtext)\n", " MTmarkedText = highlightMatches(SPtext, MTtext)\n", " formattedDiff = (\n", " f'SP: {SPmarkedText}
MT: {MTmarkedText}
Levenshtein Distance: {levDistance}
' # Add the distance\n", " MT.dm(formattedDiff)\n", " htmlContent += formattedDiff # Append to the HTML content\n", "\n", "# Save the content to an HTML file\n", "fileName = f\"levenshtein_differences_MT_SP({parashaNameEnglish.replace(' ','%20')}).html\"\n", "with open(fileName, \"w\", encoding=\"utf-8\") as file:\n", " file.write(htmlContent)\n", "\n", "# wrap html header and footer and display a download button\n", "htmlContentFull = f'{htmlStart}{htmlContent}{htmlFooter}'\n", "downloadButton = f\"\"\"\n", "', '>').replace('\"', '"').replace(\"'\", ''')}\" target=\"_blank\">\n", " \n", "\n", "\"\"\"\n", "display(HTML(downloadButton))" ] }, { "cell_type": "markdown", "id": "63886933", "metadata": {}, "source": [ "# 5 - Comparison of spelling of proper nouns between SP and MT\n", "##### [Back to ToC](#TOC)\n", "\n", "This section focuses on comparing the spelling of proper nouns between the Samaritan Pentateuch (SP) and the Masoretic Text (MT). Proper nouns, including names of people, places, and unique terms, often exhibit variations in spelling" ] }, { "cell_type": "code", "execution_count": null, "id": "a4011f7c", "metadata": {}, "outputs": [], "source": [ "import collections\n", "\n", "def collectProperNounSpellings(F, L, T, inputList):\n", " \"\"\"\n", " Collect proper noun spellings and their associated word node numbers.\n", " Ensures only one tuple is stored for each lexeme-to-spelling mapping.\n", " \"\"\"\n", " properNounsSpellings = {}\n", " for bookChapterVerse in inputList:\n", " verseNode = T.nodeFromSection(bookChapterVerse)\n", " wordNodes = L.d(verseNode, 'word')\n", " for wordNode in wordNodes:\n", " if F.sp.v(wordNode) == 'nmpr': # Check if the word is a proper noun\n", " lex = F.lex.v(wordNode) # Lexical form\n", " spelling = F.g_cons.v(wordNode) # Spelling\n", " # Store only the first occurrence for each lex-to-cons mapping\n", " if lex not in properNounsSpellings or spelling not in {item[0] for item in properNounsSpellings[lex]}:\n", " properNounsSpellings.setdefault(lex, []).append((spelling, wordNode))\n", " return properNounsSpellings\n", " \n", "SPspellingDict = collectProperNounSpellings(Fsp, Lsp, Tsp, bookChapterVerseList) \n", "MTspellingDict = collectProperNounSpellings(Fmt, Lmt, Tmt, bookChapterVerseList)" ] }, { "cell_type": "code", "execution_count": null, "id": "4f596918-087e-4658-bc57-0ffc85779cb7", "metadata": {}, "outputs": [], "source": [ "from IPython.display import HTML, display\n", "\n", "# Initialize HTML content\n", "htmlContent = f'| Author | \n", "Tony Jurg | \n", "
| Version | \n", "1.1 | \n", "
| Date | \n", "5 March 2025 | \n", "