{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "fa2af17d-fae2-49ee-8a21-55b09b0b709e", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "id": "8aef098f-3d39-4394-9fdd-b79ab97d751e", "metadata": {}, "outputs": [], "source": [ "from tf.app import use" ] }, { "cell_type": "code", "execution_count": 3, "id": "46fc48bd-e9dc-444f-b797-9ef9d2d9fe70", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/ETCBC/bhsa/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/text-fabric-data/github/ETCBC/bhsa/tf/2021" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/text-fabric-data/github/ETCBC/phono/tf/2021" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/text-fabric-data/github/ETCBC/parallels/tf/2021" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/text-fabric-data/github/ETCBC/bhsa/ner" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.2, ETCBC/bhsa/app v3, Search Reference
\n", " Data: ETCBC - bhsa 2021, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
book3910938.21100
chapter929459.19100
lex923046.22100
verse2321318.38100
half_verse451799.44100
sentence637176.70100
sentence_atom645146.61100
clause881314.84100
clause_atom907044.70100
phrase2532031.68100
phrase_atom2675321.59100
subphrase1138501.4238
word4265901.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
Parallel Passages\n", "
\n", "\n", "
\n", "
\n", "crossref\n", "
\n", "
int
\n", "\n", " 🆗 links between similar passages\n", "\n", "
\n", "\n", "
\n", "
\n", "\n", "
BHSA = Biblia Hebraica Stuttgartensia Amstelodamensis\n", "
\n", "\n", "
\n", "
\n", "book\n", "
\n", "
str
\n", "\n", " ✅ book name in Latin (Genesis; Numeri; Reges1; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "book@ll\n", "
\n", "
str
\n", "\n", " ✅ book name in amharic (ኣማርኛ)\n", "\n", "
\n", "\n", "
\n", "
\n", "chapter\n", "
\n", "
int
\n", "\n", " ✅ chapter number (1; 2; 3; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "code\n", "
\n", "
int
\n", "\n", " ✅ identifier of a clause atom relationship (0; 74; 367; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "det\n", "
\n", "
str
\n", "\n", " ✅ determinedness of phrase(atom) (det; und; NA.)\n", "\n", "
\n", "\n", "
\n", "
\n", "domain\n", "
\n", "
str
\n", "\n", " ✅ text type of clause (? (Unknown); N (narrative); D (discursive); Q (Quotation).)\n", "\n", "
\n", "\n", "
\n", "
\n", "freq_lex\n", "
\n", "
int
\n", "\n", " ✅ frequency of lexemes\n", "\n", "
\n", "\n", "
\n", "
\n", "function\n", "
\n", "
str
\n", "\n", " ✅ syntactic function of phrase (Cmpl; Objc; Pred; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "g_cons\n", "
\n", "
str
\n", "\n", " ✅ word consonantal-transliterated (B R>CJT BR> >LHJM ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "g_cons_utf8\n", "
\n", "
str
\n", "\n", " ✅ word consonantal-Hebrew (ב ראשׁית ברא אלהים)\n", "\n", "
\n", "\n", "
\n", "
\n", "g_lex\n", "
\n", "
str
\n", "\n", " ✅ lexeme pointed-transliterated (B.:- R;>CIJT B.@R@> >:ELOH ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "g_lex_utf8\n", "
\n", "
str
\n", "\n", " ✅ lexeme pointed-Hebrew (בְּ רֵאשִׁית בָּרָא אֱלֹה)\n", "\n", "
\n", "\n", "
\n", "
\n", "g_word\n", "
\n", "
str
\n", "\n", " ✅ word pointed-transliterated (B.:- R;>CI73JT B.@R@74> >:ELOHI92JM)\n", "\n", "
\n", "\n", "
\n", "
\n", "g_word_utf8\n", "
\n", "
str
\n", "\n", " ✅ word pointed-Hebrew (בְּ רֵאשִׁ֖ית בָּרָ֣א אֱלֹהִ֑ים)\n", "\n", "
\n", "\n", "
\n", "
\n", "gloss\n", "
\n", "
str
\n", "\n", " 🆗 english translation of lexeme (beginning create god(s))\n", "\n", "
\n", "\n", "
\n", "
\n", "gn\n", "
\n", "
str
\n", "\n", " ✅ grammatical gender (m; f; NA; unknown.)\n", "\n", "
\n", "\n", "
\n", "
\n", "label\n", "
\n", "
str
\n", "\n", " ✅ (half-)verse label (half verses: A; B; C; verses: GEN 01,02)\n", "\n", "
\n", "\n", "
\n", "
\n", "language\n", "
\n", "
str
\n", "\n", " ✅ of word or lexeme (Hebrew; Aramaic.)\n", "\n", "
\n", "\n", "
\n", "
\n", "lex\n", "
\n", "
str
\n", "\n", " ✅ lexeme consonantal-transliterated (B R>CJT/ BR>[ >LHJM/)\n", "\n", "
\n", "\n", "
\n", "
\n", "lex_utf8\n", "
\n", "
str
\n", "\n", " ✅ lexeme consonantal-Hebrew (ב ראשׁית֜ ברא אלהים֜)\n", "\n", "
\n", "\n", "
\n", "
\n", "ls\n", "
\n", "
str
\n", "\n", " ✅ lexical set, subclassification of part-of-speech (card; ques; mult)\n", "\n", "
\n", "\n", "
\n", "
\n", "nametype\n", "
\n", "
str
\n", "\n", " ⚠️ named entity type (pers; mens; gens; topo; ppde.)\n", "\n", "
\n", "\n", "
\n", "
\n", "nme\n", "
\n", "
str
\n", "\n", " ✅ nominal ending consonantal-transliterated (absent; n/a; JM, ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "nu\n", "
\n", "
str
\n", "\n", " ✅ grammatical number (sg; du; pl; NA; unknown.)\n", "\n", "
\n", "\n", "
\n", "
\n", "number\n", "
\n", "
int
\n", "\n", " ✅ sequence number of an object within its context\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "pargr\n", "
\n", "
str
\n", "\n", " 🆗 hierarchical paragraph number (1; 1.2; 1.2.3.4; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "pdp\n", "
\n", "
str
\n", "\n", " ✅ phrase dependent part-of-speech (art; verb; subs; nmpr, ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "pfm\n", "
\n", "
str
\n", "\n", " ✅ preformative consonantal-transliterated (absent; n/a; J, ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "prs\n", "
\n", "
str
\n", "\n", " ✅ pronominal suffix consonantal-transliterated (absent; n/a; W; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "prs_gn\n", "
\n", "
str
\n", "\n", " ✅ pronominal suffix gender (m; f; NA; unknown.)\n", "\n", "
\n", "\n", "
\n", "
\n", "prs_nu\n", "
\n", "
str
\n", "\n", " ✅ pronominal suffix number (sg; du; pl; NA; unknown.)\n", "\n", "
\n", "\n", "
\n", "
\n", "prs_ps\n", "
\n", "
str
\n", "\n", " ✅ pronominal suffix person (p1; p2; p3; NA; unknown.)\n", "\n", "
\n", "\n", "
\n", "
\n", "ps\n", "
\n", "
str
\n", "\n", " ✅ grammatical person (p1; p2; p3; NA; unknown.)\n", "\n", "
\n", "\n", "
\n", "
\n", "qere\n", "
\n", "
str
\n", "\n", " ✅ word pointed-transliterated masoretic reading correction\n", "\n", "
\n", "\n", "
\n", "
\n", "qere_trailer\n", "
\n", "
str
\n", "\n", " ✅ interword material -pointed-transliterated (Masoretic correction)\n", "\n", "
\n", "\n", "
\n", "
\n", "qere_trailer_utf8\n", "
\n", "
str
\n", "\n", " ✅ interword material -pointed-transliterated (Masoretic correction)\n", "\n", "
\n", "\n", "
\n", "
\n", "qere_utf8\n", "
\n", "
str
\n", "\n", " ✅ word pointed-Hebrew masoretic reading correction\n", "\n", "
\n", "\n", "
\n", "
\n", "rank_lex\n", "
\n", "
int
\n", "\n", " ✅ ranking of lexemes based on freqnuecy\n", "\n", "
\n", "\n", "
\n", "
\n", "rela\n", "
\n", "
str
\n", "\n", " ✅ linguistic relation between clause/(sub)phrase(atom) (ADJ; MOD; ATR; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "sp\n", "
\n", "
str
\n", "\n", " ✅ part-of-speech (art; verb; subs; nmpr, ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "st\n", "
\n", "
str
\n", "\n", " ✅ state of a noun (a (absolute); c (construct); e (emphatic).)\n", "\n", "
\n", "\n", "
\n", "
\n", "tab\n", "
\n", "
int
\n", "\n", " ✅ clause atom: its level in the linguistic embedding\n", "\n", "
\n", "\n", "
\n", "
\n", "trailer\n", "
\n", "
str
\n", "\n", " ✅ interword material pointed-transliterated (& 00 05 00_P ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "trailer_utf8\n", "
\n", "
str
\n", "\n", " ✅ interword material pointed-Hebrew (־ ׃)\n", "\n", "
\n", "\n", "
\n", "
\n", "txt\n", "
\n", "
str
\n", "\n", " ✅ text type of clause and surrounding (repetion of ? N D Q as in feature domain)\n", "\n", "
\n", "\n", "
\n", "
\n", "typ\n", "
\n", "
str
\n", "\n", " ✅ clause/phrase(atom) type (VP; NP; Ellp; Ptcp; WayX)\n", "\n", "
\n", "\n", "
\n", "
\n", "uvf\n", "
\n", "
str
\n", "\n", " ✅ univalent final consonant consonantal-transliterated (absent; N; J; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "vbe\n", "
\n", "
str
\n", "\n", " ✅ verbal ending consonantal-transliterated (n/a; W; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "vbs\n", "
\n", "
str
\n", "\n", " ✅ root formation consonantal-transliterated (absent; n/a; H; ...)\n", "\n", "
\n", "\n", "
\n", "
\n", "verse\n", "
\n", "
int
\n", "\n", " ✅ verse number\n", "\n", "
\n", "\n", "
\n", "
\n", "voc_lex\n", "
\n", "
str
\n", "\n", " ✅ vocalized lexeme pointed-transliterated (B.: R;>CIJT BR> >:ELOHIJM)\n", "\n", "
\n", "\n", "
\n", "
\n", "voc_lex_utf8\n", "
\n", "
str
\n", "\n", " ✅ vocalized lexeme pointed-Hebrew (בְּ רֵאשִׁית ברא אֱלֹהִים)\n", "\n", "
\n", "\n", "
\n", "
\n", "vs\n", "
\n", "
str
\n", "\n", " ✅ verbal stem (qal; piel; hif; apel; pael)\n", "\n", "
\n", "\n", "
\n", "
\n", "vt\n", "
\n", "
str
\n", "\n", " ✅ verbal tense (perf; impv; wayq; infc)\n", "\n", "
\n", "\n", "
\n", "
\n", "mother\n", "
\n", "
none
\n", "\n", " ✅ linguistic dependency between textual objects\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "\n", "
Phonetic Transcriptions\n", "
\n", "\n", "
\n", "
\n", "phono\n", "
\n", "
str
\n", "\n", " 🆗 phonological transcription (bᵊ rēšˌîṯ bārˈā ʔᵉlōhˈîm)\n", "\n", "
\n", "\n", "
\n", "
\n", "phono_trailer\n", "
\n", "
str
\n", "\n", " 🆗 interword material in phonological transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: ETCBC/bhsa
  3. appPath: /Users/me/github/ETCBC/bhsa/app
  4. commit: no value
  5. css: ''
  6. dataDisplay:
    • exampleSectionHtml:<code>Genesis 1:1</code> (use <a href=\"https://github.com/{org}/{repo}/blob/master/tf/{version}/book%40en.tf\" target=\"_blank\">English book names</a>)
    • excludedFeatures:
      • g_uvf_utf8
      • g_vbs
      • kq_hybrid
      • languageISO
      • g_nme
      • lex0
      • is_root
      • g_vbs_utf8
      • g_uvf
      • dist
      • root
      • suffix_person
      • g_vbe
      • dist_unit
      • suffix_number
      • distributional_parent
      • kq_hybrid_utf8
      • crossrefSET
      • instruction
      • g_prs
      • lexeme_count
      • rank_occ
      • g_pfm_utf8
      • freq_occ
      • crossrefLCS
      • functional_parent
      • g_pfm
      • g_nme_utf8
      • g_vbe_utf8
      • kind
      • g_prs_utf8
      • suffix_gender
      • mother_object_type
    • noneValues:
      • absent
      • n/a
      • none
      • unknown
      • no value
      • NA
  7. docs:
    • docBase: {docRoot}/{repo}
    • docExt: ''
    • docPage: ''
    • docRoot: https://{org}.github.io
    • featurePage: 0_home
  8. interfaceDefaults: {}
  9. isCompatible: True
  10. local: clone
  11. localDir: /Users/me/github/ETCBC/bhsa/_temp
  12. provenanceSpec:
    • corpus: BHSA = Biblia Hebraica Stuttgartensia Amstelodamensis
    • doi: 10.5281/zenodo.1007624
    • extraData: ner
    • moduleSpecs:
      • :
        • backend: no value
        • corpus: Phonetic Transcriptions
        • docUrl:https://nbviewer.jupyter.org/github/etcbc/phono/blob/master/programs/phono.ipynb
        • doi: 10.5281/zenodo.1007636
        • org: ETCBC
        • relative: /tf
        • repo: phono
      • :
        • backend: no value
        • corpus: Parallel Passages
        • docUrl:https://nbviewer.jupyter.org/github/ETCBC/parallels/blob/master/programs/parallels.ipynb
        • doi: 10.5281/zenodo.1007642
        • org: ETCBC
        • relative: /tf
        • repo: parallels
    • org: ETCBC
    • relative: /tf
    • repo: bhsa
    • version: 2021
    • webBase: https://shebanq.ancient-data.org/hebrew
    • webHint: Show this on SHEBANQ
    • webLang: la
    • webLexId: True
    • webUrl:{webBase}/text?book=<1>&chapter=<2>&verse=<3>&version={version}&mr=m&qw=q&tp=txt_p&tr=hb&wget=v&qget=v&nget=vt
    • webUrlLex: {webBase}/word?version={version}&id=<lid>
  13. release: no value
  14. typeDisplay:
    • clause:
      • label: {typ} {rela}
      • style: ''
    • clause_atom:
      • hidden: True
      • label: {code}
      • level: 1
      • style: ''
    • half_verse:
      • hidden: True
      • label: {label}
      • style: ''
      • verselike: True
    • lex:
      • featuresBare: gloss
      • label: {voc_lex_utf8}
      • lexOcc: word
      • style: orig
      • template: {voc_lex_utf8}
    • phrase:
      • label: {typ} {function}
      • style: ''
    • phrase_atom:
      • hidden: True
      • label: {typ} {rela}
      • level: 1
      • style: ''
    • sentence:
      • label: {number}
      • style: ''
    • sentence_atom:
      • hidden: True
      • label: {number}
      • level: 1
      • style: ''
    • subphrase:
      • hidden: True
      • label: {number}
      • style: ''
    • word:
      • features: pdp vs vt
      • featuresBare: lex:gloss
  15. writing: hbo
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
TF API: names N F E L T S C TF Fs Fall Es Eall Cs Call directly usable

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "A = use(\"ETCBC/bhsa:clone\", hoist=globals())" ] }, { "cell_type": "markdown", "id": "5be3f23e-99c8-4fc4-a9b9-4fcd8ccc8168", "metadata": {}, "source": [ "# Information about features and types" ] }, { "cell_type": "code", "execution_count": 4, "id": "2b9690ce-96e9-48dc-bad4-ce9269a82d75", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " TF: TF API 12.5.2, ETCBC/bhsa/app v3, Search Reference
\n", " Data: ETCBC - bhsa 2021, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
book3910938.21100
chapter929459.19100
lex923046.22100
verse2321318.38100
half_verse451799.44100
sentence637176.70100
sentence_atom645146.61100
clause881314.84100
clause_atom907044.70100
phrase2532031.68100
phrase_atom2675321.59100
subphrase1138501.4238
word4265901.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
Parallel Passages\n", "
\n", "\n", "
\n", "
\n", "crossref\n", "
\n", "
int
\n", "\n", "
\n", " 🆗 links between similar passages\n", "
\n", "\n", "
\n", "
author:
\n", "
BHSA Data: Constantijn Sikkel; Parallels Notebook: Dirk Roorda, Martijn Naaijer
\n", "
\n", "\n", "
\n", "
coreData:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:40:46Z
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
Parallels notebook, see https://github.com/ETCBC/parallels
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "\n", "
BHSA = Biblia Hebraica Stuttgartensia Amstelodamensis\n", "
\n", "\n", "
\n", "
\n", "book\n", "
\n", "
str
\n", "\n", "
\n", " ✅ book name in Latin (Genesis; Numeri; Reges1; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:55Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "book@ll\n", "
\n", "
str
\n", "\n", "
\n", " ✅ book name in amharic (ኣማርኛ)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:20:27Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
language:
\n", "
ኣማርኛ
\n", "
\n", "\n", "
\n", "
languageCode:
\n", "
am
\n", "
\n", "\n", "
\n", "
languageEnglish:
\n", "
amharic
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
book names from wikipedia and other sources
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "chapter\n", "
\n", "
int
\n", "\n", "
\n", " ✅ chapter number (1; 2; 3; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:55Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "code\n", "
\n", "
int
\n", "\n", "
\n", " ✅ identifier of a clause atom relationship (0; 74; 367; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:56Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "det\n", "
\n", "
str
\n", "\n", "
\n", " ✅ determinedness of phrase(atom) (det; und; NA.)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:56Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "domain\n", "
\n", "
str
\n", "\n", "
\n", " ✅ text type of clause (? (Unknown); N (narrative); D (discursive); Q (Quotation).)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:57Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "freq_lex\n", "
\n", "
int
\n", "\n", "
\n", " ✅ frequency of lexemes\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:24:45Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
computed on the basis of the ETCBC core set of features
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "function\n", "
\n", "
str
\n", "\n", "
\n", " ✅ syntactic function of phrase (Cmpl; Objc; Pred; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:57Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "g_cons\n", "
\n", "
str
\n", "\n", "
\n", " ✅ word consonantal-transliterated (B R>CJT BR> >LHJM ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:57Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "g_cons_utf8\n", "
\n", "
str
\n", "\n", "
\n", " ✅ word consonantal-Hebrew (ב ראשׁית ברא אלהים)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:58Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "g_lex\n", "
\n", "
str
\n", "\n", "
\n", " ✅ lexeme pointed-transliterated (B.:- R;>CIJT B.@R@> >:ELOH ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:58Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "g_lex_utf8\n", "
\n", "
str
\n", "\n", "
\n", " ✅ lexeme pointed-Hebrew (בְּ רֵאשִׁית בָּרָא אֱלֹה)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:17:59Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "g_word\n", "
\n", "
str
\n", "\n", "
\n", " ✅ word pointed-transliterated (B.:- R;>CI73JT B.@R@74> >:ELOHI92JM)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:04Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "g_word_utf8\n", "
\n", "
str
\n", "\n", "
\n", " ✅ word pointed-Hebrew (בְּ רֵאשִׁ֖ית בָּרָ֣א אֱלֹהִ֑ים)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:04Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "gloss\n", "
\n", "
str
\n", "\n", "
\n", " 🆗 english translation of lexeme (beginning create god(s))\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:13Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional lexicon file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "gn\n", "
\n", "
str
\n", "\n", "
\n", " ✅ grammatical gender (m; f; NA; unknown.)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:05Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "label\n", "
\n", "
str
\n", "\n", "
\n", " ✅ (half-)verse label (half verses: A; B; C; verses: GEN 01,02)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:06Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "language\n", "
\n", "
str
\n", "\n", "
\n", " ✅ of word or lexeme (Hebrew; Aramaic.)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:13Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional lexicon file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "lex\n", "
\n", "
str
\n", "\n", "
\n", " ✅ lexeme consonantal-transliterated (B R>CJT/ BR>[ >LHJM/)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:14Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional lexicon file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "lex_utf8\n", "
\n", "
str
\n", "\n", "
\n", " ✅ lexeme consonantal-Hebrew (ב ראשׁית֜ ברא אלהים֜)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:15Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional lexicon file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "ls\n", "
\n", "
str
\n", "\n", "
\n", " ✅ lexical set, subclassification of part-of-speech (card; ques; mult)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:15Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional lexicon file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "nametype\n", "
\n", "
str
\n", "\n", "
\n", " ⚠️ named entity type (pers; mens; gens; topo; ppde.)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:15Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional lexicon file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "nme\n", "
\n", "
str
\n", "\n", "
\n", " ✅ nominal ending consonantal-transliterated (absent; n/a; JM, ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:08Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "nu\n", "
\n", "
str
\n", "\n", "
\n", " ✅ grammatical number (sg; du; pl; NA; unknown.)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:08Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "number\n", "
\n", "
int
\n", "\n", "
\n", " ✅ sequence number of an object within its context\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:09Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", "
\n", " \n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:15Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "pargr\n", "
\n", "
str
\n", "\n", "
\n", " 🆗 hierarchical paragraph number (1; 1.2; 1.2.3.4; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:22:50Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional paragraph file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "pdp\n", "
\n", "
str
\n", "\n", "
\n", " ✅ phrase dependent part-of-speech (art; verb; subs; nmpr, ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:10Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "pfm\n", "
\n", "
str
\n", "\n", "
\n", " ✅ preformative consonantal-transliterated (absent; n/a; J, ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:11Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "prs\n", "
\n", "
str
\n", "\n", "
\n", " ✅ pronominal suffix consonantal-transliterated (absent; n/a; W; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:11Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "prs_gn\n", "
\n", "
str
\n", "\n", "
\n", " ✅ pronominal suffix gender (m; f; NA; unknown.)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:11Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "prs_nu\n", "
\n", "
str
\n", "\n", "
\n", " ✅ pronominal suffix number (sg; du; pl; NA; unknown.)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:12Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "prs_ps\n", "
\n", "
str
\n", "\n", "
\n", " ✅ pronominal suffix person (p1; p2; p3; NA; unknown.)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:12Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "ps\n", "
\n", "
str
\n", "\n", "
\n", " ✅ grammatical person (p1; p2; p3; NA; unknown.)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:12Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "qere\n", "
\n", "
str
\n", "\n", "
\n", " ✅ word pointed-transliterated masoretic reading correction\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:23:29Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional ketiv/qere file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "qere_trailer\n", "
\n", "
str
\n", "\n", "
\n", " ✅ interword material -pointed-transliterated (Masoretic correction)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:23:29Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional ketiv/qere file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "qere_trailer_utf8\n", "
\n", "
str
\n", "\n", "
\n", " ✅ interword material -pointed-transliterated (Masoretic correction)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:23:29Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional ketiv/qere file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "qere_utf8\n", "
\n", "
str
\n", "\n", "
\n", " ✅ word pointed-Hebrew masoretic reading correction\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:23:29Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional ketiv/qere file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "rank_lex\n", "
\n", "
int
\n", "\n", "
\n", " ✅ ranking of lexemes based on freqnuecy\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:24:46Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
computed on the basis of the ETCBC core set of features
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "rela\n", "
\n", "
str
\n", "\n", "
\n", " ✅ linguistic relation between clause/(sub)phrase(atom) (ADJ; MOD; ATR; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:13Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "sp\n", "
\n", "
str
\n", "\n", "
\n", " ✅ part-of-speech (art; verb; subs; nmpr, ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:16Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional lexicon file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "st\n", "
\n", "
str
\n", "\n", "
\n", " ✅ state of a noun (a (absolute); c (construct); e (emphatic).)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:14Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "tab\n", "
\n", "
int
\n", "\n", "
\n", " ✅ clause atom: its level in the linguistic embedding\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:16Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "trailer\n", "
\n", "
str
\n", "\n", "
\n", " ✅ interword material pointed-transliterated (& 00 05 00_P ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:01Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "trailer_utf8\n", "
\n", "
str
\n", "\n", "
\n", " ✅ interword material pointed-Hebrew (־ ׃)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:01Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "txt\n", "
\n", "
str
\n", "\n", "
\n", " ✅ text type of clause and surrounding (repetion of ? N D Q as in feature domain)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:16Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "typ\n", "
\n", "
str
\n", "\n", "
\n", " ✅ clause/phrase(atom) type (VP; NP; Ellp; Ptcp; WayX)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:16Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "uvf\n", "
\n", "
str
\n", "\n", "
\n", " ✅ univalent final consonant consonantal-transliterated (absent; N; J; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:17Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "vbe\n", "
\n", "
str
\n", "\n", "
\n", " ✅ verbal ending consonantal-transliterated (n/a; W; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:17Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "vbs\n", "
\n", "
str
\n", "\n", "
\n", " ✅ root formation consonantal-transliterated (absent; n/a; H; ...)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:17Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "verse\n", "
\n", "
int
\n", "\n", "
\n", " ✅ verse number\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:18Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "voc_lex\n", "
\n", "
str
\n", "\n", "
\n", " ✅ vocalized lexeme pointed-transliterated (B.: R;>CIJT BR> >:ELOHIJM)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:16Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional lexicon file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "voc_lex_utf8\n", "
\n", "
str
\n", "\n", "
\n", " ✅ vocalized lexeme pointed-Hebrew (בְּ רֵאשִׁית ברא אֱלֹהִים)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:17Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
from additional lexicon file provided by the ETCBC
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "vs\n", "
\n", "
str
\n", "\n", "
\n", " ✅ verbal stem (qal; piel; hif; apel; pael)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:18Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "vt\n", "
\n", "
str
\n", "\n", "
\n", " ✅ verbal tense (perf; impv; wayq; infc)\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:18Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "mother\n", "
\n", "
none
\n", "\n", "
\n", " ✅ linguistic dependency between textual objects\n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:18:22Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", "
\n", " \n", "
\n", "\n", "
\n", "
author:
\n", "
Eep Talstra Centre for Bible and Computer
\n", "
\n", "\n", "
\n", "
dataset:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
datasetName:
\n", "
Biblia Hebraica Stuttgartensia Amstelodamensis
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:21:17Z
\n", "
\n", "\n", "
\n", "
email:
\n", "
shebanq@ancient-data.org
\n", "
\n", "\n", "
\n", "
encoders:
\n", "
Constantijn Sikkel (QDF), Ulrik Petersen (MQL) and Dirk Roorda (TF)
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
website:
\n", "
https://shebanq.ancient-data.org
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "\n", "
Phonetic Transcriptions\n", "
\n", "\n", "
\n", "
\n", "phono\n", "
\n", "
str
\n", "\n", "
\n", " 🆗 phonological transcription (bᵊ rēšˌîṯ bārˈā ʔᵉlōhˈîm)\n", "
\n", "\n", "
\n", "
author:
\n", "
BHSA Data: Constantijn Sikkel; Phono Notebook: Dirk Roorda
\n", "
\n", "\n", "
\n", "
coreData:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:25:55Z
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
computed by the phono notebook, see https://github.com/ETCBC/phono
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "phono_trailer\n", "
\n", "
str
\n", "\n", "
\n", " 🆗 interword material in phonological transcription\n", "
\n", "\n", "
\n", "
author:
\n", "
BHSA Data: Constantijn Sikkel; Phono Notebook: Dirk Roorda
\n", "
\n", "\n", "
\n", "
coreData:
\n", "
BHSA
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-09T14:25:55Z
\n", "
\n", "\n", "
\n", "
provenance:
\n", "
computed by the phono notebook, see https://github.com/ETCBC/phono
\n", "
\n", "\n", "
\n", "
version:
\n", "
2021
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: ETCBC/bhsa
  3. appPath: /Users/me/github/ETCBC/bhsa/app
  4. commit: no value
  5. css: ''
  6. dataDisplay:
    • exampleSectionHtml:<code>Genesis 1:1</code> (use <a href=\"https://github.com/{org}/{repo}/blob/master/tf/{version}/book%40en.tf\" target=\"_blank\">English book names</a>)
    • excludedFeatures:
      • g_uvf_utf8
      • g_vbs
      • kq_hybrid
      • languageISO
      • g_nme
      • lex0
      • is_root
      • g_vbs_utf8
      • g_uvf
      • dist
      • root
      • suffix_person
      • g_vbe
      • dist_unit
      • suffix_number
      • distributional_parent
      • kq_hybrid_utf8
      • crossrefSET
      • instruction
      • g_prs
      • lexeme_count
      • rank_occ
      • g_pfm_utf8
      • freq_occ
      • crossrefLCS
      • functional_parent
      • g_pfm
      • g_nme_utf8
      • g_vbe_utf8
      • kind
      • g_prs_utf8
      • suffix_gender
      • mother_object_type
    • noneValues:
      • absent
      • n/a
      • none
      • unknown
      • no value
      • NA
  7. docs:
    • docBase: {docRoot}/{repo}
    • docExt: ''
    • docPage: ''
    • docRoot: https://{org}.github.io
    • featurePage: 0_home
  8. interfaceDefaults: {}
  9. isCompatible: True
  10. local: clone
  11. localDir: /Users/me/github/ETCBC/bhsa/_temp
  12. provenanceSpec:
    • corpus: BHSA = Biblia Hebraica Stuttgartensia Amstelodamensis
    • doi: 10.5281/zenodo.1007624
    • extraData: ner
    • moduleSpecs:
      • :
        • backend: no value
        • corpus: Phonetic Transcriptions
        • docUrl:https://nbviewer.jupyter.org/github/etcbc/phono/blob/master/programs/phono.ipynb
        • doi: 10.5281/zenodo.1007636
        • org: ETCBC
        • relative: /tf
        • repo: phono
      • :
        • backend: no value
        • corpus: Parallel Passages
        • docUrl:https://nbviewer.jupyter.org/github/ETCBC/parallels/blob/master/programs/parallels.ipynb
        • doi: 10.5281/zenodo.1007642
        • org: ETCBC
        • relative: /tf
        • repo: parallels
    • org: ETCBC
    • relative: /tf
    • repo: bhsa
    • version: 2021
    • webBase: https://shebanq.ancient-data.org/hebrew
    • webHint: Show this on SHEBANQ
    • webLang: la
    • webLexId: True
    • webUrl:{webBase}/text?book=<1>&chapter=<2>&verse=<3>&version={version}&mr=m&qw=q&tp=txt_p&tr=hb&wget=v&qget=v&nget=vt
    • webUrlLex: {webBase}/word?version={version}&id=<lid>
  13. release: no value
  14. typeDisplay:
    • clause:
      • label: {typ} {rela}
      • style: ''
    • clause_atom:
      • hidden: True
      • label: {code}
      • level: 1
      • style: ''
    • half_verse:
      • hidden: True
      • label: {label}
      • style: ''
      • verselike: True
    • lex:
      • featuresBare: gloss
      • label: {voc_lex_utf8}
      • lexOcc: word
      • style: orig
      • template: {voc_lex_utf8}
    • phrase:
      • label: {typ} {function}
      • style: ''
    • phrase_atom:
      • hidden: True
      • label: {typ} {rela}
      • level: 1
      • style: ''
    • sentence:
      • label: {number}
      • style: ''
    • sentence_atom:
      • hidden: True
      • label: {number}
      • level: 1
      • style: ''
    • subphrase:
      • hidden: True
      • label: {number}
      • style: ''
    • word:
      • features: pdp vs vt
      • featuresBare: lex:gloss
  15. writing: hbo
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "A.header(allMeta=True)" ] }, { "cell_type": "code", "execution_count": 5, "id": "c9c3f42a-4603-478b-a598-e0cec5b3b40c", "metadata": {}, "outputs": [], "source": [ "import collections" ] }, { "cell_type": "markdown", "id": "1905cd57-0544-4976-b616-9f35a9eb6491", "metadata": {}, "source": [ "The following snippet is thanks to Marek Polášek:" ] }, { "cell_type": "code", "execution_count": 6, "id": "2c669996-b47c-414c-aa58-27f1cde9c379", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "book\n", "\tbook, chapter, verse\n", "book@am\n", "\tbook\n", "book@ar\n", "\tbook\n", "book@bn\n", "\tbook\n", "book@da\n", "\tbook\n", "book@de\n", "\tbook\n", "book@el\n", "\tbook\n", "book@en\n", "\tbook\n", "book@es\n", "\tbook\n", "book@fa\n", "\tbook\n", "book@fr\n", "\tbook\n", "book@he\n", "\tbook\n", "book@hi\n", "\tbook\n", "book@id\n", "\tbook\n", "book@ja\n", "\tbook\n", "book@ko\n", "\tbook\n", "book@la\n", "\tbook\n", "book@nl\n", "\tbook\n", "book@pa\n", "\tbook\n", "book@pt\n", "\tbook\n", "book@ru\n", "\tbook\n", "book@sw\n", "\tbook\n", "book@syc\n", "\tbook\n", "book@tr\n", "\tbook\n", "book@ur\n", "\tbook\n", "book@yo\n", "\tbook\n", "book@zh\n", "\tbook\n", "chapter\n", "\tchapter, verse\n", "code\n", "\tclause_atom\n", "det\n", "\tphrase, phrase_atom\n", "domain\n", "\tclause\n", "freq_lex\n", "\tlex, word\n", "function\n", "\tphrase\n", "g_cons\n", "\tword\n", "g_cons_utf8\n", "\tword\n", "g_lex\n", "\tword\n", "g_lex_utf8\n", "\tword\n", "g_word\n", "\tword\n", "g_word_utf8\n", "\tword\n", "gloss\n", "\tlex, word\n", "gn\n", "\tword\n", "label\n", "\tverse, half_verse\n", "language\n", "\tlex, word\n", "lex\n", "\tlex, word\n", "lex_utf8\n", "\tlex, word\n", "ls\n", "\tlex, word\n", "nametype\n", "\tlex, word\n", "nme\n", "\tword\n", "nu\n", "\tword\n", "number\n", "\tsentence, sentence_atom, clause, clause_atom, phrase, phrase_atom, word\n", "otype\n", "pargr\n", "\tclause_atom\n", "pdp\n", "\tword\n", "pfm\n", "\tword\n", "phono\n", "\tword\n", "phono_trailer\n", "\tword\n", "prs\n", "\tword\n", "prs_gn\n", "\tword\n", "prs_nu\n", "\tword\n", "prs_ps\n", "\tword\n", "ps\n", "\tword\n", "qere\n", "\tword\n", "qere_trailer\n", "\tword\n", "qere_trailer_utf8\n", "\tword\n", "qere_utf8\n", "\tword\n", "rank_lex\n", "\tlex, word\n", "rela\n", "\tclause, phrase, phrase_atom, subphrase\n", "sp\n", "\tlex, word\n", "st\n", "\tword\n", "tab\n", "\tclause_atom\n", "trailer\n", "\tword\n", "trailer_utf8\n", "\tword\n", "txt\n", "\tclause\n", "typ\n", "\tclause, clause_atom, phrase, phrase_atom\n", "uvf\n", "\tword\n", "vbe\n", "\tword\n", "vbs\n", "\tword\n", "verse\n", "\tverse\n", "voc_lex\n", "\tlex, word\n", "voc_lex_utf8\n", "\tlex, word\n", "vs\n", "\tword\n", "vt\n", "\tword\n", " 21s done\n" ] } ], "source": [ "A.indent(reset=True)\n", "data = collections.defaultdict(list)\n", "\n", "allTypes = F.otype.all\n", "\n", "for prop in Fall():\n", " print(prop)\n", " if prop == \"otype\":\n", " continue\n", " for t in allTypes:\n", " if len(Fs(prop).freqList({t})) > 0:\n", " data[prop].append(t)\n", "\n", " print(f\"\\t{', '.join(data[prop])}\")\n", "\n", "A.info(\"done\")" ] }, { "cell_type": "markdown", "id": "d94bcaf8-65e2-4d65-a835-dfbff93e86a6", "metadata": {}, "source": [ "Here I explore if it can be done a bit faster" ] }, { "cell_type": "code", "execution_count": 7, "id": "8f4ac506-8c15-484a-955c-e4c0eaf38684", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "book\n", "chapter\n", "lex\n", "verse\n", "half_verse\n", "sentence\n", "sentence_atom\n", "clause\n", "clause_atom\n", "phrase\n", "phrase_atom\n", "subphrase\n", "word\n", "book\n", "\tbook, chapter, verse\n", "book@am\n", "\tbook\n", "book@ar\n", "\tbook\n", "book@bn\n", "\tbook\n", "book@da\n", "\tbook\n", "book@de\n", "\tbook\n", "book@el\n", "\tbook\n", "book@en\n", "\tbook\n", "book@es\n", "\tbook\n", "book@fa\n", "\tbook\n", "book@fr\n", "\tbook\n", "book@he\n", "\tbook\n", "book@hi\n", "\tbook\n", "book@id\n", "\tbook\n", "book@ja\n", "\tbook\n", "book@ko\n", "\tbook\n", "book@la\n", "\tbook\n", "book@nl\n", "\tbook\n", "book@pa\n", "\tbook\n", "book@pt\n", "\tbook\n", "book@ru\n", "\tbook\n", "book@sw\n", "\tbook\n", "book@syc\n", "\tbook\n", "book@tr\n", "\tbook\n", "book@ur\n", "\tbook\n", "book@yo\n", "\tbook\n", "book@zh\n", "\tbook\n", "chapter\n", "\tchapter, verse\n", "code\n", "\tclause_atom\n", "det\n", "\tphrase, phrase_atom\n", "domain\n", "\tclause\n", "freq_lex\n", "\tlex, word\n", "function\n", "\tphrase\n", "g_cons\n", "\tword\n", "g_cons_utf8\n", "\tword\n", "g_lex\n", "\tword\n", "g_lex_utf8\n", "\tword\n", "g_word\n", "\tword\n", "g_word_utf8\n", "\tword\n", "gloss\n", "\tlex, word\n", "gn\n", "\tword\n", "label\n", "\tverse, half_verse\n", "language\n", "\tlex, word\n", "lex\n", "\tlex, word\n", "lex_utf8\n", "\tlex, word\n", "ls\n", "\tlex, word\n", "nametype\n", "\tlex, word\n", "nme\n", "\tword\n", "nu\n", "\tword\n", "number\n", "\tsentence, sentence_atom, clause, clause_atom, phrase, phrase_atom, word\n", "pargr\n", "\tclause_atom\n", "pdp\n", "\tword\n", "pfm\n", "\tword\n", "phono\n", "\tword\n", "phono_trailer\n", "\tword\n", "prs\n", "\tword\n", "prs_gn\n", "\tword\n", "prs_nu\n", "\tword\n", "prs_ps\n", "\tword\n", "ps\n", "\tword\n", "qere\n", "\tword\n", "qere_trailer\n", "\tword\n", "qere_trailer_utf8\n", "\tword\n", "qere_utf8\n", "\tword\n", "rank_lex\n", "\tlex, word\n", "rela\n", "\tclause, phrase, phrase_atom, subphrase\n", "sp\n", "\tlex, word\n", "st\n", "\tword\n", "tab\n", "\tclause_atom\n", "trailer\n", "\tword\n", "trailer_utf8\n", "\tword\n", "txt\n", "\tclause\n", "typ\n", "\tclause, clause_atom, phrase, phrase_atom\n", "uvf\n", "\tword\n", "vbe\n", "\tword\n", "vbs\n", "\tword\n", "verse\n", "\tverse\n", "voc_lex\n", "\tlex, word\n", "voc_lex_utf8\n", "\tlex, word\n", "vs\n", "\tword\n", "vt\n", "\tword\n", " 14s done\n" ] } ], "source": [ "A.indent(reset=True)\n", "\n", "data1 = collections.defaultdict(list)\n", "\n", "allProps = [p for p in Fall() if p != \"otype\"]\n", "\n", "for t in F.otype.all:\n", " print(t)\n", " nodes = F.otype.s(t)\n", "\n", " for prop in allProps:\n", " if any(Fs(prop).v(n) is not None for n in nodes):\n", " data1[prop].append(t)\n", "\n", "for prop in allProps:\n", " print(prop)\n", " print(f\"\\t{', '.join(data1[prop])}\")\n", "\n", "A.info(\"done\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "127a7101-2226-4b6c-a9e5-7bcac142f956", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data == data1" ] }, { "cell_type": "markdown", "id": "5a30ae92-d01f-45ae-b740-14776c309306", "metadata": {}, "source": [ "Marek responded with this algorithm:" ] }, { "cell_type": "code", "execution_count": 13, "id": "3af9ffd7-11fb-40a2-a287-ad89a1c990f1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "book\n", "chapter\n", "lex\n", "verse\n", "half_verse\n", "sentence\n", "sentence_atom\n", "clause\n", "clause_atom\n", "phrase\n", "phrase_atom\n", "subphrase\n", "word\n", "book\n", "\tbook, chapter, verse\n", "book@am\n", "\tbook\n", "book@ar\n", "\tbook\n", "book@bn\n", "\tbook\n", "book@da\n", "\tbook\n", "book@de\n", "\tbook\n", "book@el\n", "\tbook\n", "book@en\n", "\tbook\n", "book@es\n", "\tbook\n", "book@fa\n", "\tbook\n", "book@fr\n", "\tbook\n", "book@he\n", "\tbook\n", "book@hi\n", "\tbook\n", "book@id\n", "\tbook\n", "book@ja\n", "\tbook\n", "book@ko\n", "\tbook\n", "book@la\n", "\tbook\n", "book@nl\n", "\tbook\n", "book@pa\n", "\tbook\n", "book@pt\n", "\tbook\n", "book@ru\n", "\tbook\n", "book@sw\n", "\tbook\n", "book@syc\n", "\tbook\n", "book@tr\n", "\tbook\n", "book@ur\n", "\tbook\n", "book@yo\n", "\tbook\n", "book@zh\n", "\tbook\n", "chapter\n", "\tchapter, verse\n", "code\n", "\tclause_atom\n", "det\n", "\tphrase, phrase_atom\n", "domain\n", "\tclause\n", "freq_lex\n", "\tlex, word\n", "function\n", "\tphrase\n", "g_cons\n", "\tword\n", "g_cons_utf8\n", "\tword\n", "g_lex\n", "\tword\n", "g_lex_utf8\n", "\tword\n", "g_word\n", "\tword\n", "g_word_utf8\n", "\tword\n", "gloss\n", "\tlex, word\n", "gn\n", "\tword\n", "label\n", "\tverse, half_verse\n", "language\n", "\tlex, word\n", "lex\n", "\tlex, word\n", "lex_utf8\n", "\tlex, word\n", "ls\n", "\tlex, word\n", "nametype\n", "\tlex, word\n", "nme\n", "\tword\n", "nu\n", "\tword\n", "number\n", "\tsentence, sentence_atom, clause, clause_atom, phrase, phrase_atom, word\n", "pargr\n", "\tclause_atom\n", "pdp\n", "\tword\n", "pfm\n", "\tword\n", "phono\n", "\tword\n", "phono_trailer\n", "\tword\n", "prs\n", "\tword\n", "prs_gn\n", "\tword\n", "prs_nu\n", "\tword\n", "prs_ps\n", "\tword\n", "ps\n", "\tword\n", "qere\n", "\tword\n", "qere_trailer\n", "\tword\n", "qere_trailer_utf8\n", "\tword\n", "qere_utf8\n", "\tword\n", "rank_lex\n", "\tlex, word\n", "rela\n", "\tclause, phrase, phrase_atom, subphrase\n", "sp\n", "\tlex, word\n", "st\n", "\tword\n", "tab\n", "\tclause_atom\n", "trailer\n", "\tword\n", "trailer_utf8\n", "\tword\n", "txt\n", "\tclause\n", "typ\n", "\tclause, clause_atom, phrase, phrase_atom\n", "uvf\n", "\tword\n", "vbe\n", "\tword\n", "vbs\n", "\tword\n", "verse\n", "\tverse\n", "voc_lex\n", "\tlex, word\n", "voc_lex_utf8\n", "\tlex, word\n", "vs\n", "\tword\n", "vt\n", "\tword\n", " 2.24s done\n" ] } ], "source": [ "A.indent(reset=True)\n", "\n", "data3 = collections.defaultdict(list)\n", "nodes_type = collections.defaultdict(list)\n", "\n", "allProps = [p for p in Fall() if p != \"otype\"]\n", "for t in F.otype.all:\n", " print(t)\n", " nodes_type[t] = F.otype.s(t)\n", "\n", "for prop in allProps:\n", " nodes_with_prop = set([i[0] for i in Fs(prop).items()])\n", " for t in F.otype.all:\n", " if len(nodes_with_prop.intersection(nodes_type[t]))>0:\n", " data3[prop].append(t)\n", " \n", "for prop in allProps:\n", " print(prop)\n", " print(f\"\\t{', '.join(data3[prop])}\")\n", "\n", "A.info(\"done\")" ] }, { "cell_type": "code", "execution_count": 10, "id": "13f38e94-31f3-43aa-bce5-e3e419412275", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data3 == data1" ] }, { "cell_type": "markdown", "id": "12839aa4-5a00-4586-bed5-b72cfb9ed96a", "metadata": {}, "source": [ "Marvellous!\n", "\n", "Now I make that code a tad more pythonic." ] }, { "cell_type": "code", "execution_count": 15, "id": "85a534f7-025c-40ce-8543-19f554c80f01", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "book\n", "chapter\n", "lex\n", "verse\n", "half_verse\n", "sentence\n", "sentence_atom\n", "clause\n", "clause_atom\n", "phrase\n", "phrase_atom\n", "subphrase\n", "word\n", "book\n", "\tbook, chapter, verse\n", "book@am\n", "\tbook\n", "book@ar\n", "\tbook\n", "book@bn\n", "\tbook\n", "book@da\n", "\tbook\n", "book@de\n", "\tbook\n", "book@el\n", "\tbook\n", "book@en\n", "\tbook\n", "book@es\n", "\tbook\n", "book@fa\n", "\tbook\n", "book@fr\n", "\tbook\n", "book@he\n", "\tbook\n", "book@hi\n", "\tbook\n", "book@id\n", "\tbook\n", "book@ja\n", "\tbook\n", "book@ko\n", "\tbook\n", "book@la\n", "\tbook\n", "book@nl\n", "\tbook\n", "book@pa\n", "\tbook\n", "book@pt\n", "\tbook\n", "book@ru\n", "\tbook\n", "book@sw\n", "\tbook\n", "book@syc\n", "\tbook\n", "book@tr\n", "\tbook\n", "book@ur\n", "\tbook\n", "book@yo\n", "\tbook\n", "book@zh\n", "\tbook\n", "chapter\n", "\tchapter, verse\n", "code\n", "\tclause_atom\n", "det\n", "\tphrase, phrase_atom\n", "domain\n", "\tclause\n", "freq_lex\n", "\tlex, word\n", "function\n", "\tphrase\n", "g_cons\n", "\tword\n", "g_cons_utf8\n", "\tword\n", "g_lex\n", "\tword\n", "g_lex_utf8\n", "\tword\n", "g_word\n", "\tword\n", "g_word_utf8\n", "\tword\n", "gloss\n", "\tlex, word\n", "gn\n", "\tword\n", "label\n", "\tverse, half_verse\n", "language\n", "\tlex, word\n", "lex\n", "\tlex, word\n", "lex_utf8\n", "\tlex, word\n", "ls\n", "\tlex, word\n", "nametype\n", "\tlex, word\n", "nme\n", "\tword\n", "nu\n", "\tword\n", "number\n", "\tsentence, sentence_atom, clause, clause_atom, phrase, phrase_atom, word\n", "pargr\n", "\tclause_atom\n", "pdp\n", "\tword\n", "pfm\n", "\tword\n", "phono\n", "\tword\n", "phono_trailer\n", "\tword\n", "prs\n", "\tword\n", "prs_gn\n", "\tword\n", "prs_nu\n", "\tword\n", "prs_ps\n", "\tword\n", "ps\n", "\tword\n", "qere\n", "\tword\n", "qere_trailer\n", "\tword\n", "qere_trailer_utf8\n", "\tword\n", "qere_utf8\n", "\tword\n", "rank_lex\n", "\tlex, word\n", "rela\n", "\tclause, phrase, phrase_atom, subphrase\n", "sp\n", "\tlex, word\n", "st\n", "\tword\n", "tab\n", "\tclause_atom\n", "trailer\n", "\tword\n", "trailer_utf8\n", "\tword\n", "txt\n", "\tclause\n", "typ\n", "\tclause, clause_atom, phrase, phrase_atom\n", "uvf\n", "\tword\n", "vbe\n", "\tword\n", "vbs\n", "\tword\n", "verse\n", "\tverse\n", "voc_lex\n", "\tlex, word\n", "voc_lex_utf8\n", "\tlex, word\n", "vs\n", "\tword\n", "vt\n", "\tword\n", " 1.73s done\n" ] } ], "source": [ "A.indent(reset=True)\n", "\n", "data4 = collections.defaultdict(list)\n", "nodes_type = collections.defaultdict(list)\n", "\n", "allProps = [p for p in Fall() if p != \"otype\"]\n", "allTypes = F.otype.all\n", "\n", "for t in allTypes:\n", " print(t)\n", " nodes_type[t] = set(F.otype.s(t))\n", "\n", "for prop in allProps:\n", " nodes_with_prop = {i[0] for i in Fs(prop).items()}\n", " for t in allTypes:\n", " if len(nodes_with_prop & nodes_type[t]) > 0:\n", " data4[prop].append(t)\n", "\n", "for prop in allProps:\n", " print(prop)\n", " print(f\"\\t{', '.join(data4[prop])}\")\n", "\n", "A.info(\"done\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "163a8319-0fe0-4eb2-aa2c-b0497b44c61a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data4 == data3" ] }, { "cell_type": "markdown", "id": "448d3486-b6df-4233-a49f-19fdd7ff4b67", "metadata": {}, "source": [ "This takes 25% off the execution time.\n", "\n", "The crux of Marek's optimization is to start with the items in the feature, which for most features is smaller than the number\n", "of nodes of a given type.\n", "\n", "Knowing that, I wonder whether I can use the iterator `any` again instead of set construction?\n", "Will that improve the speed? No set has to be constructed. But set construction is very fast. Let's see." ] }, { "cell_type": "code", "execution_count": 20, "id": "8f1dc9ca-7764-40f2-b0e9-3235467ae7b8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "book\n", "chapter\n", "lex\n", "verse\n", "half_verse\n", "sentence\n", "sentence_atom\n", "clause\n", "clause_atom\n", "phrase\n", "phrase_atom\n", "subphrase\n", "word\n", "book\n", "\tbook, chapter, verse\n", "book@am\n", "\tbook\n", "book@ar\n", "\tbook\n", "book@bn\n", "\tbook\n", "book@da\n", "\tbook\n", "book@de\n", "\tbook\n", "book@el\n", "\tbook\n", "book@en\n", "\tbook\n", "book@es\n", "\tbook\n", "book@fa\n", "\tbook\n", "book@fr\n", "\tbook\n", "book@he\n", "\tbook\n", "book@hi\n", "\tbook\n", "book@id\n", "\tbook\n", "book@ja\n", "\tbook\n", "book@ko\n", "\tbook\n", "book@la\n", "\tbook\n", "book@nl\n", "\tbook\n", "book@pa\n", "\tbook\n", "book@pt\n", "\tbook\n", "book@ru\n", "\tbook\n", "book@sw\n", "\tbook\n", "book@syc\n", "\tbook\n", "book@tr\n", "\tbook\n", "book@ur\n", "\tbook\n", "book@yo\n", "\tbook\n", "book@zh\n", "\tbook\n", "chapter\n", "\tchapter, verse\n", "code\n", "\tclause_atom\n", "det\n", "\tphrase, phrase_atom\n", "domain\n", "\tclause\n", "freq_lex\n", "\tlex, word\n", "function\n", "\tphrase\n", "g_cons\n", "\tword\n", "g_cons_utf8\n", "\tword\n", "g_lex\n", "\tword\n", "g_lex_utf8\n", "\tword\n", "g_word\n", "\tword\n", "g_word_utf8\n", "\tword\n", "gloss\n", "\tlex, word\n", "gn\n", "\tword\n", "label\n", "\tverse, half_verse\n", "language\n", "\tlex, word\n", "lex\n", "\tlex, word\n", "lex_utf8\n", "\tlex, word\n", "ls\n", "\tlex, word\n", "nametype\n", "\tlex, word\n", "nme\n", "\tword\n", "nu\n", "\tword\n", "number\n", "\tsentence, sentence_atom, clause, clause_atom, phrase, phrase_atom, word\n", "pargr\n", "\tclause_atom\n", "pdp\n", "\tword\n", "pfm\n", "\tword\n", "phono\n", "\tword\n", "phono_trailer\n", "\tword\n", "prs\n", "\tword\n", "prs_gn\n", "\tword\n", "prs_nu\n", "\tword\n", "prs_ps\n", "\tword\n", "ps\n", "\tword\n", "qere\n", "\tword\n", "qere_trailer\n", "\tword\n", "qere_trailer_utf8\n", "\tword\n", "qere_utf8\n", "\tword\n", "rank_lex\n", "\tlex, word\n", "rela\n", "\tclause, phrase, phrase_atom, subphrase\n", "sp\n", "\tlex, word\n", "st\n", "\tword\n", "tab\n", "\tclause_atom\n", "trailer\n", "\tword\n", "trailer_utf8\n", "\tword\n", "txt\n", "\tclause\n", "typ\n", "\tclause, clause_atom, phrase, phrase_atom\n", "uvf\n", "\tword\n", "vbe\n", "\tword\n", "vbs\n", "\tword\n", "verse\n", "\tverse\n", "voc_lex\n", "\tlex, word\n", "voc_lex_utf8\n", "\tlex, word\n", "vs\n", "\tword\n", "vt\n", "\tword\n", " 8.12s done\n" ] } ], "source": [ "A.indent(reset=True)\n", "\n", "data5 = collections.defaultdict(list)\n", "nodes_type = collections.defaultdict(list)\n", "\n", "allProps = [p for p in Fall() if p != \"otype\"]\n", "allTypes = F.otype.all\n", "\n", "for t in allTypes:\n", " print(t)\n", " nodes_type[t] = set(F.otype.s(t))\n", "\n", "for prop in allProps:\n", " nodes_with_prop = [i[0] for i in Fs(prop).items()]\n", " for t in allTypes:\n", " these_nodes_type = nodes_type[t]\n", " if any(n in these_nodes_type for n in nodes_with_prop):\n", " data5[prop].append(t)\n", "\n", "for prop in allProps:\n", " print(prop)\n", " print(f\"\\t{', '.join(data5[prop])}\")\n", "\n", "A.info(\"done\")" ] }, { "cell_type": "code", "execution_count": 21, "id": "2164866d-6a0b-4eec-abb3-32ad26488759", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data5 == data4" ] }, { "cell_type": "markdown", "id": "efdfb1c7-ad7e-4a8e-8c85-87a46c969e70", "metadata": {}, "source": [ "Important lesson: the overhead of the set construction is far less than using `any`.\n", "\n", "In the set intersection there is no loop with python code involved, so it proceeds with C-speed.\n", "\n", "In the `any` iterator there is a Python expression (`n in these_nodes_type`) and that will be executed at Python speed.\n", "There we loose the time.\n", "\n", "So the second crux of Marek's method is to use set intersection instead of an iterator." ] }, { "cell_type": "markdown", "id": "e0acd1cf-26a4-45ba-b13c-4da7850c89b8", "metadata": {}, "source": [ "# New function `showFeatureTypes()`" ] }, { "cell_type": "code", "execution_count": 40, "id": "291da7ca-de16-438e-a9bc-1a85cbdb807f", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "feature | node types\n", ":- | :-\n", "**book** | *book*, *chapter*, *verse*\n", "**book@am** | *book*\n", "**book@ar** | *book*\n", "**book@bn** | *book*\n", "**book@da** | *book*\n", "**book@de** | *book*\n", "**book@el** | *book*\n", "**book@en** | *book*\n", "**book@es** | *book*\n", "**book@fa** | *book*\n", "**book@fr** | *book*\n", "**book@he** | *book*\n", "**book@hi** | *book*\n", "**book@id** | *book*\n", "**book@ja** | *book*\n", "**book@ko** | *book*\n", "**book@la** | *book*\n", "**book@nl** | *book*\n", "**book@pa** | *book*\n", "**book@pt** | *book*\n", "**book@ru** | *book*\n", "**book@sw** | *book*\n", "**book@syc** | *book*\n", "**book@tr** | *book*\n", "**book@ur** | *book*\n", "**book@yo** | *book*\n", "**book@zh** | *book*\n", "**chapter** | *chapter*, *verse*\n", "**code** | *clause_atom*\n", "**det** | *phrase*, *phrase_atom*\n", "**domain** | *clause*\n", "**freq_lex** | *lex*, *word*\n", "**function** | *phrase*\n", "**g_cons** | *word*\n", "**g_cons_utf8** | *word*\n", "**g_lex** | *word*\n", "**g_lex_utf8** | *word*\n", "**g_word** | *word*\n", "**g_word_utf8** | *word*\n", "**gloss** | *lex*, *word*\n", "**gn** | *word*\n", "**label** | *verse*, *half_verse*\n", "**language** | *lex*, *word*\n", "**lex** | *lex*, *word*\n", "**lex_utf8** | *lex*, *word*\n", "**ls** | *lex*, *word*\n", "**nametype** | *lex*, *word*\n", "**nme** | *word*\n", "**nu** | *word*\n", "**number** | *sentence*, *sentence_atom*, *clause*, *clause_atom*, *phrase*, *phrase_atom*, *word*\n", "**pargr** | *clause_atom*\n", "**pdp** | *word*\n", "**pfm** | *word*\n", "**phono** | *word*\n", "**phono_trailer** | *word*\n", "**prs** | *word*\n", "**prs_gn** | *word*\n", "**prs_nu** | *word*\n", "**prs_ps** | *word*\n", "**ps** | *word*\n", "**qere** | *word*\n", "**qere_trailer** | *word*\n", "**qere_trailer_utf8** | *word*\n", "**qere_utf8** | *word*\n", "**rank_lex** | *lex*, *word*\n", "**rela** | *clause*, *phrase*, *phrase_atom*, *subphrase*\n", "**sp** | *lex*, *word*\n", "**st** | *word*\n", "**tab** | *clause_atom*\n", "**trailer** | *word*\n", "**trailer_utf8** | *word*\n", "**txt** | *clause*\n", "**typ** | *clause*, *clause_atom*, *phrase*, *phrase_atom*\n", "**uvf** | *word*\n", "**vbe** | *word*\n", "**vbs** | *word*\n", "**verse** | *verse*\n", "**voc_lex** | *lex*, *word*\n", "**voc_lex_utf8** | *lex*, *word*\n", "**vs** | *word*\n", "**vt** | *word*" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/markdown": [ "To get this overview in a dict, call `A.featureTypes(show=False)`\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "A.featureTypes()" ] }, { "cell_type": "code", "execution_count": 37, "id": "96458d46-6bc8-40e8-b978-325846eb2a2e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'book': ['book', 'chapter', 'verse'],\n", " 'book@am': ['book'],\n", " 'book@ar': ['book'],\n", " 'book@bn': ['book'],\n", " 'book@da': ['book'],\n", " 'book@de': ['book'],\n", " 'book@el': ['book'],\n", " 'book@en': ['book'],\n", " 'book@es': ['book'],\n", " 'book@fa': ['book'],\n", " 'book@fr': ['book'],\n", " 'book@he': ['book'],\n", " 'book@hi': ['book'],\n", " 'book@id': ['book'],\n", " 'book@ja': ['book'],\n", " 'book@ko': ['book'],\n", " 'book@la': ['book'],\n", " 'book@nl': ['book'],\n", " 'book@pa': ['book'],\n", " 'book@pt': ['book'],\n", " 'book@ru': ['book'],\n", " 'book@sw': ['book'],\n", " 'book@syc': ['book'],\n", " 'book@tr': ['book'],\n", " 'book@ur': ['book'],\n", " 'book@yo': ['book'],\n", " 'book@zh': ['book'],\n", " 'chapter': ['chapter', 'verse'],\n", " 'code': ['clause_atom'],\n", " 'det': ['phrase', 'phrase_atom'],\n", " 'domain': ['clause'],\n", " 'freq_lex': ['lex', 'word'],\n", " 'function': ['phrase'],\n", " 'g_cons': ['word'],\n", " 'g_cons_utf8': ['word'],\n", " 'g_lex': ['word'],\n", " 'g_lex_utf8': ['word'],\n", " 'g_word': ['word'],\n", " 'g_word_utf8': ['word'],\n", " 'gloss': ['lex', 'word'],\n", " 'gn': ['word'],\n", " 'label': ['verse', 'half_verse'],\n", " 'language': ['lex', 'word'],\n", " 'lex': ['lex', 'word'],\n", " 'lex_utf8': ['lex', 'word'],\n", " 'ls': ['lex', 'word'],\n", " 'nametype': ['lex', 'word'],\n", " 'nme': ['word'],\n", " 'nu': ['word'],\n", " 'number': ['sentence',\n", " 'sentence_atom',\n", " 'clause',\n", " 'clause_atom',\n", " 'phrase',\n", " 'phrase_atom',\n", " 'word'],\n", " 'pargr': ['clause_atom'],\n", " 'pdp': ['word'],\n", " 'pfm': ['word'],\n", " 'phono': ['word'],\n", " 'phono_trailer': ['word'],\n", " 'prs': ['word'],\n", " 'prs_gn': ['word'],\n", " 'prs_nu': ['word'],\n", " 'prs_ps': ['word'],\n", " 'ps': ['word'],\n", " 'qere': ['word'],\n", " 'qere_trailer': ['word'],\n", " 'qere_trailer_utf8': ['word'],\n", " 'qere_utf8': ['word'],\n", " 'rank_lex': ['lex', 'word'],\n", " 'rela': ['clause', 'phrase', 'phrase_atom', 'subphrase'],\n", " 'sp': ['lex', 'word'],\n", " 'st': ['word'],\n", " 'tab': ['clause_atom'],\n", " 'trailer': ['word'],\n", " 'trailer_utf8': ['word'],\n", " 'txt': ['clause'],\n", " 'typ': ['clause', 'clause_atom', 'phrase', 'phrase_atom'],\n", " 'uvf': ['word'],\n", " 'vbe': ['word'],\n", " 'vbs': ['word'],\n", " 'verse': ['verse'],\n", " 'voc_lex': ['lex', 'word'],\n", " 'voc_lex_utf8': ['lex', 'word'],\n", " 'vs': ['word'],\n", " 'vt': ['word']}" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "A.featureTypes(show=False)" ] }, { "cell_type": "markdown", "id": "90771d13-e78c-4268-a07d-b8f2f64a32ea", "metadata": {}, "source": [ "# Lexemes\n", "\n", "Different lexemes can have the same value for feature `lex`.\n", "\n", "What is happening?" ] }, { "cell_type": "code", "execution_count": 15, "id": "ac2fee6a-14c5-4e16-81a4-fac72c4c91ea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "lx=1437743 lex='NHR/' lan='Hebrew'\n", "lx=1442740 lex='NHR[' lan='Hebrew'\n", "lx=1443420 lex='NHR=[' lan='Hebrew'\n", "lx=1444637 lex='NHRH/' lan='Hebrew'\n", "lx=1445749 lex='NHR/' lan='Aramaic'\n" ] } ], "source": [ "for lx in F.otype.s(\"lex\"):\n", " lex = F.lex.v(lx)\n", " lan = F.language.v(lx)\n", " \n", " if lex[0:3] == \"NHR\":\n", " print(f\"{lx=} {lex=} {lan=}\")" ] }, { "cell_type": "markdown", "id": "e897f69e-cf22-4e37-abcf-34c8baf1e26c", "metadata": {}, "source": [ "The lexemes in question belong to a different language!" ] }, { "cell_type": "markdown", "id": "bc4905b3-1c5e-4d62-9235-b320f14acc86", "metadata": {}, "source": [ "# `freq` and `rank` features" ] }, { "cell_type": "code", "execution_count": 27, "id": "e16bcfb9-3f78-4d51-9c0a-f597fcd1ed6e", "metadata": {}, "outputs": [], "source": [ "from collections import Counter\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 29, "id": "e6bb1d92-d4bc-48ba-9172-c558e9bd9864", "metadata": {}, "outputs": [], "source": [ "lex_count = {\n", " lang: Counter([F.lex.v(n) for n in F.otype.s(\"word\") if F.language.v(n) == lang])\n", " for lang in (\"Hebrew\", \"Aramaic\")\n", "}\n", "data = []\n", "\n", "for i in F.otype.s(\"lex\"):\n", " lang = F.language.v(i)\n", " data.append(\n", " {\n", " \"id\": i,\n", " \"lex\": F.lex.v(i),\n", " \"lang\": lang,\n", " \"freq_lex\": F.freq_lex.v(i),\n", " \"rank_lex\": F.rank_lex.v(i),\n", " \"freq_cnt\": lex_count[lang][F.lex.v(i)],\n", " }\n", " )" ] }, { "cell_type": "code", "execution_count": 30, "id": "e7ca00d4-83a1-4d5d-80c9-e6f6f8ca237b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lexlangfreq_lexrank_lexfreq_cnt
id
1443538WAramaic7310731
1437609WHebrew50272050272
1443534LAramaic3781378
1437607HHebrew30386130386
1437629LHebrew20069220069
..................
1444624>LP=[Hebrew157131
1444625JWY>T/Hebrew157131
1441980>MH====/Hebrew157131
1444616MDXPH/Hebrew157131
1446831JCC/Hebrew157131
\n", "

9230 rows × 5 columns

\n", "
" ], "text/plain": [ " lex lang freq_lex rank_lex freq_cnt\n", "id \n", "1443538 W Aramaic 731 0 731\n", "1437609 W Hebrew 50272 0 50272\n", "1443534 L Aramaic 378 1 378\n", "1437607 H Hebrew 30386 1 30386\n", "1437629 L Hebrew 20069 2 20069\n", "... ... ... ... ... ...\n", "1444624 >LP=[ Hebrew 1 5713 1\n", "1444625 JWY>T/ Hebrew 1 5713 1\n", "1441980 >MH====/ Hebrew 1 5713 1\n", "1444616 MDXPH/ Hebrew 1 5713 1\n", "1446831 JCC/ Hebrew 1 5713 1\n", "\n", "[9230 rows x 5 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(data)\n", "df = df.set_index([\"id\"])\n", "df.sort_values(\"rank_lex\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "9ff4d0a3-8bd8-4ae7-97c2-6cca7adddc23", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "W 51003\n", "H 30392\n", "L 20447\n", "B 15768\n", ">T 10987\n", "MN 7681\n", "JHWH/ 6828\n", "L 5521\n", ">CR 5500\n" ] } ], "source": [ "for (v, f) in F.lex.freqList(nodeTypes={\"word\"})[0:10]:\n", " print(f\"{v} {f}\")" ] }, { "cell_type": "code", "execution_count": 20, "id": "11a74379-294b-4422-a960-c5f2352b2071", "metadata": {}, "outputs": [], "source": [ "wa = 1443538\n", "wh = 1437609" ] }, { "cell_type": "code", "execution_count": 21, "id": "8b9fe73a-15c2-4c84-823c-3fce86534873", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "W Aramaic\n", "W Hebrew\n" ] } ], "source": [ "for lx in (wa, wh):\n", " print(f\"{F.lex.v(lx)} {F.language.v(lx)}\")" ] }, { "cell_type": "code", "execution_count": 22, "id": "a1cf61b2-70c5-4aa0-b467-4e9749d9e741", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "731" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "F.freq_lex.v(wa)" ] }, { "cell_type": "code", "execution_count": 23, "id": "a5d0af77-9d22-4b59-9308-08964208a959", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "50272" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "F.freq_lex.v(wh)" ] }, { "cell_type": "code", "execution_count": 24, "id": "339fdf0f-819d-40da-8969-aa54dcb3b6d6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "F.rank_lex.v(wa)" ] }, { "cell_type": "code", "execution_count": 25, "id": "06459c15-efd9-4772-be50-9930912f27b4", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "F.rank_lex.v(wh)" ] }, { "cell_type": "code", "execution_count": null, "id": "ad38fd8c-31fb-43f7-b104-7c93be2e6981", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }