{ "cells": [ { "cell_type": "markdown", "metadata": { "toc": true }, "source": [ "

Table of Contents

\n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Converter file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Latest and Greatest" ] }, { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Processing Version 1935\n", " 0.00s Importing data from walking through the source ...\n", " | 0.00s Preparing metadata... \n", " | 0.00s No structure nodes will be set up\n", " | SECTION TYPES: book, chapter, verse\n", " | SECTION FEATURES: book, chapter, verse\n", " | STRUCTURE TYPES: \n", " | STRUCTURE FEATURES: \n", " | TEXT FEATURES:\n", " | | text-orig-full word\n", " | 0.00s OK\n", " | 0.00s Following director... \n", "\thandling input\\OTt4.txt...\n", " | 11s \"edge\" actions: 0\n", " | 11s \"feature\" actions: 685735\n", " | 11s \"node\" actions: 62042\n", " | 11s \"resume\" actions: 0\n", " | 11s \"slot\" actions: 623693\n", " | 11s \"terminate\" actions: 685735\n", " | 57 x \"book\" node \n", " | 1193 x \"chapter\" node \n", " | 30420 x \"subverse\" node \n", " | 30372 x \"verse\" node \n", " | 623693 x \"word\" node = slot type\n", " | 685735 nodes of all types\n", " | 11s OK\n", " | 0.03s Removing unlinked nodes ... \n", " | | 0.00s 1 unlinked \"chapter\" node: [1]\n", " | | 0.00s 1 unlinked \"verse\" node: [1]\n", " | | 0.00s 1 unlinked \"subverse\" node: [1]\n", " | | 0.00s 3 unlinked nodes\n", " | | 0.00s Leaving 685732 nodes\n", " | 0.00s checking for nodes and edges ... \n", " | 0.00s OK\n", " | 0.00s checking features ... \n", " | 0.48s OK\n", " | 0.00s reordering nodes ...\n", " | 0.18s Sorting 57 nodes of type \"book\"\n", " | 0.24s Sorting 1192 nodes of type \"chapter\"\n", " | 0.31s Sorting 30419 nodes of type \"subverse\"\n", " | 0.42s Sorting 30371 nodes of type \"verse\"\n", " | 0.53s Max node = 685732\n", " | 0.54s OK\n", " | 0.00s reassigning feature values ...\n", " | | 1.14s node feature \"BOL_gloss\" with 623693 nodes\n", " | | 1.36s node feature \"BOL_lexeme_dict\" with 623693 nodes\n", " | | 1.58s node feature \"abc_order\" with 623693 nodes\n", " | | 1.79s node feature \"book\" with 623750 nodes\n", " | | 2.00s node feature \"case\" with 623693 nodes\n", " | | 2.21s node feature \"chapter\" with 624885 nodes\n", " | | 2.43s node feature \"degree\" with 623693 nodes\n", " | | 2.65s node feature \"freq_lemma\" with 623693 nodes\n", " | | 2.89s node feature \"g_cons_utf8\" with 623693 nodes\n", " | | 3.11s node feature \"gn\" with 623693 nodes\n", " | | 3.36s node feature \"lemma_gloss\" with 623693 nodes\n", " | | 3.58s node feature \"lemma_translit\" with 623693 nodes\n", " | | 3.83s node feature \"lex_utf8\" with 623693 nodes\n", " | | 4.07s node feature \"mood\" with 623693 nodes\n", " | | 4.32s node feature \"morphology\" with 623693 nodes\n", " | | 4.53s node feature \"nu\" with 623693 nodes\n", " | | 4.76s node feature \"orig_order\" with 623693 nodes\n", " | | 4.98s node feature \"ps\" with 623693 nodes\n", " | | 5.21s node feature \"sp\" with 623693 nodes\n", " | | 5.41s node feature \"strong\" with 623693 nodes\n", " | | 5.66s node feature \"subverse\" with 654112 nodes\n", " | | 5.91s node feature \"tense\" with 623693 nodes\n", " | | 6.13s node feature \"translit_SBL\" with 623693 nodes\n", " | | 6.36s node feature \"verse\" with 654064 nodes\n", " | | 6.59s node feature \"voice\" with 623693 nodes\n", " | | 6.83s node feature \"word\" with 623693 nodes\n", " | 6.05s OK\n", " 0.00s Exporting 27 node and 1 edge and 1 config features to output:\n", " 0.00s VALIDATING oslots feature\n", " 0.09s VALIDATING oslots feature\n", " 0.09s maxSlot= 623693\n", " 0.09s maxNode= 685732\n", " 0.10s OK: oslots is valid\n", " | 1.12s T BOL_gloss to output\n", " | 1.21s T BOL_lexeme_dict to output\n", " | 1.13s T abc_order to output\n", " | 1.14s T book to output\n", " | 1.00s T case to output\n", " | 1.00s T chapter to output\n", " | 1.01s T degree to output\n", " | 1.12s T freq_lemma to output\n", " | 1.14s T g_cons_utf8 to output\n", " | 1.06s T gn to output\n", " | 1.15s T lemma_gloss to output\n", " | 1.02s T lemma_translit to output\n", " | 1.14s T lex_utf8 to output\n", " | 0.98s T mood to output\n", " | 1.07s T morphology to output\n", " | 1.34s T nu to output\n", " | 1.66s T orig_order to output\n", " | 0.32s T otype to output\n", " | 1.20s T ps to output\n", " | 1.28s T sp to output\n", " | 1.17s T strong to output\n", " | 1.08s T subverse to output\n", " | 1.13s T tense to output\n", " | 1.21s T translit_SBL to output\n", " | 1.34s T verse to output\n", " | 1.02s T voice to output\n", " | 1.32s T word to output\n", " | 0.52s T oslots to output\n", " | 0.00s M otext to output\n", " 31s Exported 27 node features and 1 edge features and 1 config features to output\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 184, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os\n", "import re\n", "import collections\n", "import json\n", "import csv\n", "# from glob import glob\n", "from tf.fabric import Fabric\n", "from tf.convert.walker import CV\n", "# from tf.compose import modify\n", "\n", "source_dirs = 'input' # \"input\" is the name of the input folder that contains the source file\n", "output_dirs = 'output' # \"output\" is the name of the output folder to which the finished TF files will be dumped into\n", "\n", "bo2book = {line.split()[0]:line.split()[1] for line in '''\n", "OTt4 Old_Testament\n", "'''.split('\\n') if line} # \"OT\" is the name of the file in the input folder AND \"split()\" splits at space\n", "\n", "# patts = {'section': re.compile('(\\d*):(\\d*)\\.(\\d*)')}\n", "\n", "def director(cv):\n", " \n", " '''\n", " Walks through LXX and triggers\n", " slot and node creation events.\n", " '''\n", " \n", " # process books in order\n", " for bo, book in bo2book.items():\n", " \n", " book_loc = os.path.join(source_dirs, f'{bo}.txt')\n", " \n", " print(f'\\thandling {book_loc}...')\n", " \n", " with open(book_loc, 'r', encoding=\"utf8\") as infile:\n", " text = [w for w in infile.read().split('\\n') if w]\n", " \n", " this_book = cv.node('book')\n", " \n", " # keep track of when to trigger paragraph, chapter, and verse objects\n", " # para_track = 1 # keep counts of paragraphs\n", " prev_book = \"Gen\" # start at Genesis\n", " prev_chap = 1 # start at 1\n", " prev_verse = 1 # start at 1\n", " prev_subverse = ''\n", " wrdnum = 0 # start at 0\n", " this_chap = cv.node('chapter')\n", " # this_para = cv.node('paragraph')\n", " this_verse = cv.node('verse')\n", " this_subverse = cv.node('subverse')\n", " \n", " # iterate through words and construct objects\n", " for word in text:\n", "\n", " wrdnum += 1\n", "\n", " data = word.split('\\t')\n", " # word_data, lemmas = data[:7], data[7:]\n", "\n", " word_data = data[:26] #the number here is the amount of columns\n", " morphology = ' '.join(data[26:]) #the number here is the amount of columns\n", " \n", " # segment out word data\n", " # bo_code, ref, brake, ketiv, qere, morph, strongs = word_data\n", " orig_order, book, chapter, verse, subverse, word, lex_utf8, g_cons_utf8, translit_SBL, lemma_gloss, strong, sp, morphology, case, nu, gn, degree, tense, voice, mood, ps, lemma_translit, abc_order, freq_lemma, BOL_lexeme_dict, BOL_gloss = word_data\n", "\n", " # if chapter == \"Prolog\":\n", " # chapter = 0\n", "\n", " subverse == \"\"\n", "\n", "\n", " #try:\n", " # verse = int(verse)\n", " #except ValueError:\n", " # subverse = verse[-1:]\n", " # verse = verse[:-1]\n", "\n", " if verse == \"\":\n", " print(f'{orig_order}: {verse} {subverse}')\n", "\n", "\n", " # strongs_lemma, anlex_lemma = ' '.join(lemmas).split('!') # reconstitute lemmas and split on !\n", "\n", " # chapt, verse, wrdnum = [int(v) for v in patts['section'].match(ref).groups()]\n", "\n", " # -- handle TF events --\n", "\n", " # detect book boundary\n", " if prev_book != book:\n", "\n", " # end subverse\n", " cv.feature(this_subverse, subverse=prev_subverse)\n", " cv.terminate(this_subverse)\n", "\n", " # end verse\n", " cv.feature(this_verse, verse=prev_verse)\n", " cv.terminate(this_verse)\n", " \n", " # end chapter\n", " cv.feature(this_chap, chapter=prev_chap)\n", " cv.terminate(this_chap)\n", "\n", " # end book\n", " cv.feature(this_book, book=prev_book)\n", " cv.terminate(this_book)\n", " \n", " # new book, chapter, verse, and subverse begin\n", " this_book = cv.node('book')\n", " prev_book = book\n", " this_chap = cv.node('chapter')\n", " prev_chap = chapter\n", " this_verse = cv.node('verse')\n", " prev_verse = verse\n", " this_subverse = cv.node('subverse')\n", " prev_subverse = subverse\n", " wrdnum = 1\n", " \n", " # detect chapter boundary\n", " elif prev_chap != chapter:\n", "\n", " # end subverse\n", " cv.feature(this_subverse, subverse=prev_subverse)\n", " cv.terminate(this_subverse)\n", " \n", " # end verse\n", " cv.feature(this_verse, verse=prev_verse)\n", " cv.terminate(this_verse)\n", " \n", " # end chapter\n", " cv.feature(this_chap, chapter=prev_chap)\n", " cv.terminate(this_chap)\n", " \n", " # new chapter, verse, and subverse begin\n", " this_chap = cv.node('chapter')\n", " prev_chap = chapter\n", " this_verse = cv.node('verse')\n", " prev_verse = verse\n", " this_subverse = cv.node('subverse')\n", " prev_subverse = subverse\n", " wrdnum = 1\n", " \n", " # detect verse boundary\n", " elif prev_verse != verse:\n", "\n", " # end subverse\n", " cv.feature(this_subverse, subverse=prev_subverse)\n", " cv.terminate(this_subverse)\n", "\n", " # end verse\n", " cv.feature(this_verse, verse=prev_verse)\n", " cv.terminate(this_verse)\n", "\n", " # new verse and subverse begin\n", " this_verse = cv.node('verse')\n", " prev_verse = verse\n", " this_subverse = cv.node('subverse')\n", " prev_subverse = subverse\n", " wrdnum = 1\n", "\n", " # detect subverse boundary\n", " elif prev_subverse != subverse:\n", " cv.feature(this_subverse, subverse=prev_subverse)\n", " cv.terminate(this_subverse)\n", " this_subverse = cv.node('subverse')\n", " prev_subverse = subverse\n", "\n", " \n", " # detect paragraph boundary\n", " # if brake == 'P':\n", " # cv.feature(this_para, para=para_track)\n", " # cv.terminate(this_para)\n", " # this_para = cv.node('paragraph') # start a new paragraph\n", " # para_track += 1 # count paragraphs in the book\n", " \n", " \n", " # make word object\n", " this_word = cv.slot()\n", " cv.feature(this_word, \n", "\n", " orig_order=orig_order,\n", " book=book,\n", " chapter=chapter,\n", " verse=verse,\n", " subverse=subverse,\n", " word=word,\n", " lex_utf8=lex_utf8,\n", " g_cons_utf8=g_cons_utf8,\n", " translit_SBL=translit_SBL,\n", " lemma_gloss=lemma_gloss,\n", " strong=strong,\n", " sp=sp,\n", " morphology=morphology,\n", " case=case,\n", " nu=nu,\n", " gn=gn,\n", " degree=degree,\n", " tense=tense,\n", " voice=voice,\n", " mood=mood,\n", " ps=ps,\n", " lemma_translit=lemma_translit,\n", " abc_order=abc_order,\n", " freq_lemma=freq_lemma,\n", " BOL_lexeme_dict=BOL_lexeme_dict,\n", " BOL_gloss=BOL_gloss,\n", "\n", " \n", " # ketiv=ketiv, \n", " # qere=qere, \n", " # strongs=strongs, \n", " # str_lem=strongs_lemma.strip(),\n", " # anlex_lem=anlex_lemma.strip()\n", " )\n", " cv.terminate(this_word)\n", " \n", " # end book and its objects\n", " # - end subverse\n", " cv.feature(this_subverse, subverse=prev_subverse)\n", " cv.terminate(this_subverse)\n", "\n", " # - end verse\n", " cv.feature(this_verse, verse=prev_verse)\n", " cv.terminate(this_verse)\n", " \n", " # - end paragraph\n", " # cv.feature(this_para, para=para_track)\n", " # cv.terminate(this_para)\n", " \n", " # - end chapter\n", " cv.feature(this_chap, chapter=prev_chap)\n", " cv.terminate(this_chap)\n", " \n", " # - end book\n", " cv.feature(this_book, book=prev_book)\n", " cv.terminate(this_book)\n", "\n", "\n", "slotType = 'word'\n", "otext = {'fmt:text-orig-full':'{word} ',\n", " 'sectionTypes':'book,chapter,verse',\n", " 'sectionFeatures':'book,chapter,verse'}\n", "\n", "generic = {'Name': 'LXX',\n", " 'Version': '1935',\n", " 'Author': 'Rahlfs',\n", " 'Editors': 'CCAT, Eliran Wong',\n", " 'Converter': 'Adrian Negrea, Oliver Glanz', \n", " 'Source:':'https://github.com/eliranwong/LXX-Rahlfs-1935',\n", " 'Note':'?'}\n", "\n", "intFeatures = {'chapter', 'verse'}\n", "\n", "featureMeta = {\n", " 'orig_order': {'description': 'original word order in corpus'},\n", " 'book': {'description': 'book'},\n", " 'chapter': {'description': 'chapter'},\n", " 'verse': {'description': 'verse'},\n", " 'subverse': {'description': 'subverse'},\n", " 'word': {'description': 'text realized word'},\n", " 'lex_utf8': {'description': 'normalized word'},\n", " 'g_cons_utf8': {'description': 'word without accents'},\n", " 'translit_SBL': {'description': 'SBL transliteration'},\n", " 'lemma_gloss': {'description': 'English gloss'},\n", " 'strong': {'description': 'Strong numbers'},\n", " 'sp': {'description': 'part of speech'},\n", " 'morphology': {'description': 'morphology'},\n", " 'case': {'description': 'case'},\n", " 'nu': {'description': 'number'},\n", " 'gn': {'description': 'gender'},\n", " 'degree': {'description': 'degree'},\n", " 'tense': {'description': 'tense'},\n", " 'voice': {'description': 'voice'},\n", " 'mood': {'description': 'mood'},\n", " 'ps': {'description': 'person'},\n", " 'lemma_translit': {'description': 'lemma transliteration'},\n", " 'abc_order': {'description': 'dictionary order'},\n", " 'freq_lemma': {'description': 'frequency of word in corpus'},\n", " 'BOL_lexeme_dict': {'description': 'BOL dictionary form of lemma'},\n", " 'BOL_gloss': {'description': 'BOL English gloss'},\n", " \n", " # 'para': {'description': 'A paragraph number'},\n", " # 'ketiv': {'descrption': 'The text as it is written in the printed Tischendorf'},\n", " # 'qere': {'description': 'The text as the editor thinks it should have been'},\n", " # 'strongs': {'description': 'A word\\'s number in Strongs'},\n", " # 'str_lem': {'description': 'Word lemma that corresponds to The NEW Strong\\'sComplete Dictionary of Bible Words'},\n", " # 'anlex_lem': {'description': 'Word lemma that corresponds to Friberg, Friberg and Miller\\'s ANLEX'}\n", " }\n", "\n", "\n", "# configure metadata/output\n", "version = '1935'\n", "generic['Version'] = version\n", "\n", "output = os.path.join(output_dirs, version)\n", "\n", "print(f'Processing Version {version}')\n", "output_dir = output_dirs.format(version=version)\n", "\n", "TF = Fabric(locations=output_dir, silent=True)\n", "cv = CV(TF)\n", "\n", "cv.walk(director,\n", " slotType,\n", " otext=otext,\n", " generic=generic,\n", " intFeatures=intFeatures,\n", " featureMeta=featureMeta,\n", " warn=True,\n", " force=False,)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# First, I have to laod different modules that I use for analyzing the data and for plotting:\n", "import sys, os, collections\n", "import pandas as pd\n", "import numpy as np\n", "import re\n", "import csv\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt; plt.rcdefaults()\n", "from matplotlib.pyplot import figure\n", "from collections import Counter\n", "\n", "# Second, I have to load the Text Fabric app\n", "from tf.fabric import Fabric\n", "from tf.app import use" ] }, { "cell_type": "code", "execution_count": 133, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderbookchapterversesubversewordlex_utf8g_cons_utf8translit_SBLlemma_glossstrongspmorphologycasenugndegreetensevoicemoodps
01Gen11NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaN
12Gen11NaNἀρχῇἀρχήαρχηarchēorigin; beginningG746nounN.DSFDatSingFemNaNNaNNaNNaNNaN
23Gen11NaNἐποίησενποιέωποιεωepoiēsendo; makeG4160verbV.AAI3SNaNSingNaNNaNAorActInd3rd
34Gen11NaNοhotheG3588pronoun, articleRA.NSMNomSingMascNaNNaNNaNNaNNaN
45Gen11NaNθεὸςθεόςθεοςtheosGodG2316nounN.NSMNomSingMascNaNNaNNaNNaNNaN
56Gen11NaNτὸνοtontheG3588pronoun, articleRA.ASMAccSingMascNaNNaNNaNNaNNaN
67Gen11NaNοὐρανὸνοὐρανόςουρανοςouranonsky; heavenG3772nounN.ASMAccSingMascNaNNaNNaNNaNNaN
78Gen11NaNκαὶκαίκαιkaiand; evenG2532conjunctionCNaNNaNNaNNaNNaNNaNNaNNaN
89Gen11NaNτὴνοtēntheG3588pronoun, articleRA.ASFAccSingFemNaNNaNNaNNaNNaN
910Gen11NaNγῆνγῆγηgēnearth; landG1093nounN.ASFAccSingFemNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " orig_order book chapter verse subverse word lex_utf8 g_cons_utf8 \\\n", "0 1 Gen 1 1 NaN ἐν ἐν εν \n", "1 2 Gen 1 1 NaN ἀρχῇ ἀρχή αρχη \n", "2 3 Gen 1 1 NaN ἐποίησεν ποιέω ποιεω \n", "3 4 Gen 1 1 NaN ὁ ὁ ο \n", "4 5 Gen 1 1 NaN θεὸς θεός θεος \n", "5 6 Gen 1 1 NaN τὸν ὁ ο \n", "6 7 Gen 1 1 NaN οὐρανὸν οὐρανός ουρανος \n", "7 8 Gen 1 1 NaN καὶ καί και \n", "8 9 Gen 1 1 NaN τὴν ὁ ο \n", "9 10 Gen 1 1 NaN γῆν γῆ γη \n", "\n", " translit_SBL lemma_gloss strong sp morphology case \\\n", "0 en in G1722 preposition P NaN \n", "1 archē origin; beginning G746 noun N.DSF Dat \n", "2 epoiēsen do; make G4160 verb V.AAI3S NaN \n", "3 ho the G3588 pronoun, article RA.NSM Nom \n", "4 theos God G2316 noun N.NSM Nom \n", "5 ton the G3588 pronoun, article RA.ASM Acc \n", "6 ouranon sky; heaven G3772 noun N.ASM Acc \n", "7 kai and; even G2532 conjunction C NaN \n", "8 tēn the G3588 pronoun, article RA.ASF Acc \n", "9 gēn earth; land G1093 noun N.ASF Acc \n", "\n", " nu gn degree tense voice mood ps \n", "0 NaN NaN NaN NaN NaN NaN NaN \n", "1 Sing Fem NaN NaN NaN NaN NaN \n", "2 Sing NaN NaN Aor Act Ind 3rd \n", "3 Sing Masc NaN NaN NaN NaN NaN \n", "4 Sing Masc NaN NaN NaN NaN NaN \n", "5 Sing Masc NaN NaN NaN NaN NaN \n", "6 Sing Masc NaN NaN NaN NaN NaN \n", "7 NaN NaN NaN NaN NaN NaN NaN \n", "8 Sing Fem NaN NaN NaN NaN NaN \n", "9 Sing Fem NaN NaN NaN NaN NaN " ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureadd=pd.read_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_CCATLXX/LXX_source_v1.3.xlsx',sheet_name='FULL_data')\n", "pd.set_option('display.max_columns', 50)\n", "featureadd.head(10)" ] }, { "cell_type": "code", "execution_count": 134, "metadata": {}, "outputs": [], "source": [ "from unidecode import unidecode" ] }, { "cell_type": "code", "execution_count": 135, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderbookchapterversesubversewordlex_utf8g_cons_utf8translit_SBLlemma_glossstrongspmorphologycasenugndegreetensevoicemoodpslemma_translit
01Gen11NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen
12Gen11NaNἀρχῇἀρχήαρχηarchēorigin; beginningG746nounN.DSFDatSingFemNaNNaNNaNNaNNaNarkhe
23Gen11NaNἐποίησενποιέωποιεωepoiēsendo; makeG4160verbV.AAI3SNaNSingNaNNaNAorActInd3rdpoieo
34Gen11NaNοhotheG3588pronoun, articleRA.NSMNomSingMascNaNNaNNaNNaNNaNo
45Gen11NaNθεὸςθεόςθεοςtheosGodG2316nounN.NSMNomSingMascNaNNaNNaNNaNNaNtheos
\n", "
" ], "text/plain": [ " orig_order book chapter verse subverse word lex_utf8 g_cons_utf8 \\\n", "0 1 Gen 1 1 NaN ἐν ἐν εν \n", "1 2 Gen 1 1 NaN ἀρχῇ ἀρχή αρχη \n", "2 3 Gen 1 1 NaN ἐποίησεν ποιέω ποιεω \n", "3 4 Gen 1 1 NaN ὁ ὁ ο \n", "4 5 Gen 1 1 NaN θεὸς θεός θεος \n", "\n", " translit_SBL lemma_gloss strong sp morphology case \\\n", "0 en in G1722 preposition P NaN \n", "1 archē origin; beginning G746 noun N.DSF Dat \n", "2 epoiēsen do; make G4160 verb V.AAI3S NaN \n", "3 ho the G3588 pronoun, article RA.NSM Nom \n", "4 theos God G2316 noun N.NSM Nom \n", "\n", " nu gn degree tense voice mood ps lemma_translit \n", "0 NaN NaN NaN NaN NaN NaN NaN en \n", "1 Sing Fem NaN NaN NaN NaN NaN arkhe \n", "2 Sing NaN NaN Aor Act Ind 3rd poieo \n", "3 Sing Masc NaN NaN NaN NaN NaN o \n", "4 Sing Masc NaN NaN NaN NaN NaN theos " ] }, "execution_count": 135, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureadd['lemma_translit']=featureadd['lex_utf8'].apply(unidecode)\n", "featureadd.head(5)" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderlex_utf8
01ἐν
12ἀρχή
23ποιέω
34
45θεός
\n", "
" ], "text/plain": [ " orig_order lex_utf8\n", "0 1 ἐν\n", "1 2 ἀρχή\n", "2 3 ποιέω\n", "3 4 ὁ\n", "4 5 θεός" ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC1=featureadd[['orig_order','lex_utf8']]\n", "ABC1.head(5)" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_order
count623693.000000
mean311847.000000
std180044.805057
min1.000000
25%155924.000000
50%311847.000000
75%467770.000000
max623693.000000
\n", "
" ], "text/plain": [ " orig_order\n", "count 623693.000000\n", "mean 311847.000000\n", "std 180044.805057\n", "min 1.000000\n", "25% 155924.000000\n", "50% 311847.000000\n", "75% 467770.000000\n", "max 623693.000000" ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC1.describe()" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderlex_utf8
141279141280Ααλαφ
256210256211Ααρα
302080302081Αβαδια
296208296209Αβαδιας
256426256427Αβαδων
274105274106Αβαιαν
149542149543Αβαισαν
598778598779Αβαλ
579703579704Αβαμα
236283236284Αβανα
\n", "
" ], "text/plain": [ " orig_order lex_utf8\n", "141279 141280 Ααλαφ\n", "256210 256211 Ααρα\n", "302080 302081 Αβαδια\n", "296208 296209 Αβαδιας\n", "256426 256427 Αβαδων\n", "274105 274106 Αβαιαν\n", "149542 149543 Αβαισαν\n", "598778 598779 Αβαλ\n", "579703 579704 Αβαμα\n", "236283 236284 Αβανα" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABCdict = ABC1.drop_duplicates(['lex_utf8']).sort_values(by='lex_utf8', ascending=[True])\n", "ABCdict.head(10)" ] }, { "cell_type": "code", "execution_count": 139, "metadata": {}, "outputs": [], "source": [ "ABCdict.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_CCATLXX/feature-dev/ABC1order.xlsx')\n" ] }, { "cell_type": "code", "execution_count": 140, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderlex_utf8abc_order
01ἐν4638
12ἀρχή1835
23ποιέω10580
349434
45θεός6191
57οὐρανός9842
68καί7030
710γῆ3082
812δέ3302
914εἰμί4092
\n", "
" ], "text/plain": [ " orig_order lex_utf8 abc_order\n", "0 1 ἐν 4638\n", "1 2 ἀρχή 1835\n", "2 3 ποιέω 10580\n", "3 4 ὁ 9434\n", "4 5 θεός 6191\n", "5 7 οὐρανός 9842\n", "6 8 καί 7030\n", "7 10 γῆ 3082\n", "8 12 δέ 3302\n", "9 14 εἰμί 4092" ] }, "execution_count": 140, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC2=pd.read_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_CCATLXX/feature-dev/ABC2order.xlsx')\n", "pd.set_option('display.max_columns', 50)\n", "ABC2.head(10)" ] }, { "cell_type": "code", "execution_count": 141, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lex_utf8abc_order
0ἐν4638
1ἀρχή1835
2ποιέω10580
39434
4θεός6191
\n", "
" ], "text/plain": [ " lex_utf8 abc_order\n", "0 ἐν 4638\n", "1 ἀρχή 1835\n", "2 ποιέω 10580\n", "3 ὁ 9434\n", "4 θεός 6191" ] }, "execution_count": 141, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC2=ABC2.drop(['orig_order'], axis=1)\n", "ABC2.head()" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderchapterverse
count623693.000000623693.000000623693.000000
mean311847.00000019.20050116.949878
std180044.80505720.67968113.994139
min1.0000000.0000000.000000
25%155924.0000006.0000007.000000
50%311847.00000013.00000014.000000
75%467770.00000025.00000023.000000
max623693.000000151.000000176.000000
\n", "
" ], "text/plain": [ " orig_order chapter verse\n", "count 623693.000000 623693.000000 623693.000000\n", "mean 311847.000000 19.200501 16.949878\n", "std 180044.805057 20.679681 13.994139\n", "min 1.000000 0.000000 0.000000\n", "25% 155924.000000 6.000000 7.000000\n", "50% 311847.000000 13.000000 14.000000\n", "75% 467770.000000 25.000000 23.000000\n", "max 623693.000000 151.000000 176.000000" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureadd.describe()" ] }, { "cell_type": "code", "execution_count": 143, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderbookchapterversesubversewordlex_utf8g_cons_utf8translit_SBLlemma_glossstrongspmorphologycasenugndegreetensevoicemoodpslemma_translitabc_order
01Gen11NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen4638
186Gen16NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen4638
2232Gen111NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen4638
3264Gen112NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen4638
4291Gen114NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen4638
\n", "
" ], "text/plain": [ " orig_order book chapter verse subverse word lex_utf8 g_cons_utf8 \\\n", "0 1 Gen 1 1 NaN ἐν ἐν εν \n", "1 86 Gen 1 6 NaN ἐν ἐν εν \n", "2 232 Gen 1 11 NaN ἐν ἐν εν \n", "3 264 Gen 1 12 NaN ἐν ἐν εν \n", "4 291 Gen 1 14 NaN ἐν ἐν εν \n", "\n", " translit_SBL lemma_gloss strong sp morphology case nu gn \\\n", "0 en in G1722 preposition P NaN NaN NaN \n", "1 en in G1722 preposition P NaN NaN NaN \n", "2 en in G1722 preposition P NaN NaN NaN \n", "3 en in G1722 preposition P NaN NaN NaN \n", "4 en in G1722 preposition P NaN NaN NaN \n", "\n", " degree tense voice mood ps lemma_translit abc_order \n", "0 NaN NaN NaN NaN NaN en 4638 \n", "1 NaN NaN NaN NaN NaN en 4638 \n", "2 NaN NaN NaN NaN NaN en 4638 \n", "3 NaN NaN NaN NaN NaN en 4638 \n", "4 NaN NaN NaN NaN NaN en 4638 " ] }, "execution_count": 143, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureadd=pd.merge (featureadd, ABC2,\n", " on='lex_utf8',\n", " how='outer')\n", "featureadd.head(5)" ] }, { "cell_type": "code", "execution_count": 144, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderchapterverseabc_order
count623693.000000623693.000000623693.000000623693.000000
mean311847.00000019.20050116.9498787261.430627
std180044.80505820.67968113.9941393447.810620
min1.0000000.0000000.0000001.000000
25%155924.0000006.0000007.0000004421.000000
50%311847.00000013.00000014.0000007096.000000
75%467770.00000025.00000023.0000009456.000000
max623693.000000151.000000176.00000014174.000000
\n", "
" ], "text/plain": [ " orig_order chapter verse abc_order\n", "count 623693.000000 623693.000000 623693.000000 623693.000000\n", "mean 311847.000000 19.200501 16.949878 7261.430627\n", "std 180044.805058 20.679681 13.994139 3447.810620\n", "min 1.000000 0.000000 0.000000 1.000000\n", "25% 155924.000000 6.000000 7.000000 4421.000000\n", "50% 311847.000000 13.000000 14.000000 7096.000000\n", "75% 467770.000000 25.000000 23.000000 9456.000000\n", "max 623693.000000 151.000000 176.000000 14174.000000" ] }, "execution_count": 144, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureadd.describe()" ] }, { "cell_type": "code", "execution_count": 159, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderbookchapterversesubversewordlex_utf8g_cons_utf8translit_SBLlemma_glossstrongspmorphologycasenugndegreetensevoicemoodpslemma_translitabc_orderfreq_lemma
01Gen11NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
186Gen16NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
2232Gen111NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
3264Gen112NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
4291Gen114NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
\n", "
" ], "text/plain": [ " orig_order book chapter verse subverse word lex_utf8 g_cons_utf8 \\\n", "0 1 Gen 1 1 NaN ἐν ἐν εν \n", "1 86 Gen 1 6 NaN ἐν ἐν εν \n", "2 232 Gen 1 11 NaN ἐν ἐν εν \n", "3 264 Gen 1 12 NaN ἐν ἐν εν \n", "4 291 Gen 1 14 NaN ἐν ἐν εν \n", "\n", " translit_SBL lemma_gloss strong sp morphology case nu gn \\\n", "0 en in G1722 preposition P NaN NaN NaN \n", "1 en in G1722 preposition P NaN NaN NaN \n", "2 en in G1722 preposition P NaN NaN NaN \n", "3 en in G1722 preposition P NaN NaN NaN \n", "4 en in G1722 preposition P NaN NaN NaN \n", "\n", " degree tense voice mood ps lemma_translit abc_order freq_lemma \n", "0 NaN NaN NaN NaN NaN en 4638 14316 \n", "1 NaN NaN NaN NaN NaN en 4638 14316 \n", "2 NaN NaN NaN NaN NaN en 4638 14316 \n", "3 NaN NaN NaN NaN NaN en 4638 14316 \n", "4 NaN NaN NaN NaN NaN en 4638 14316 " ] }, "execution_count": 159, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage2 = featureadd\n", "featureaddstage2.head(5)" ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderchapterverseabc_orderfreq_lemma
count623693.000000623693.000000623693.000000623693.000000623693.000000
mean311847.00000019.20050116.9498787261.43062721743.617390
std180044.80505820.67968113.9941393447.81062032782.706194
min1.0000000.0000000.0000001.0000001.000000
25%155924.0000006.0000007.0000004421.000000244.000000
50%311847.00000013.00000014.0000007096.0000002522.000000
75%467770.00000025.00000023.0000009456.00000029396.000000
max623693.000000151.000000176.00000014174.00000088444.000000
\n", "
" ], "text/plain": [ " orig_order chapter verse abc_order \\\n", "count 623693.000000 623693.000000 623693.000000 623693.000000 \n", "mean 311847.000000 19.200501 16.949878 7261.430627 \n", "std 180044.805058 20.679681 13.994139 3447.810620 \n", "min 1.000000 0.000000 0.000000 1.000000 \n", "25% 155924.000000 6.000000 7.000000 4421.000000 \n", "50% 311847.000000 13.000000 14.000000 7096.000000 \n", "75% 467770.000000 25.000000 23.000000 9456.000000 \n", "max 623693.000000 151.000000 176.000000 14174.000000 \n", "\n", " freq_lemma \n", "count 623693.000000 \n", "mean 21743.617390 \n", "std 32782.706194 \n", "min 1.000000 \n", "25% 244.000000 \n", "50% 2522.000000 \n", "75% 29396.000000 \n", "max 88444.000000 " ] }, "execution_count": 160, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage2.describe()" ] }, { "cell_type": "code", "execution_count": 161, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderbookchapterversesubversewordlex_utf8g_cons_utf8translit_SBLlemma_glossstrongspmorphologycasenugndegreetensevoicemoodpslemma_translitabc_orderfreq_lemma
01Gen11NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
186Gen16NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
2232Gen111NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
3264Gen112NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
4291Gen114NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
\n", "
" ], "text/plain": [ " orig_order book chapter verse subverse word lex_utf8 g_cons_utf8 \\\n", "0 1 Gen 1 1 NaN ἐν ἐν εν \n", "1 86 Gen 1 6 NaN ἐν ἐν εν \n", "2 232 Gen 1 11 NaN ἐν ἐν εν \n", "3 264 Gen 1 12 NaN ἐν ἐν εν \n", "4 291 Gen 1 14 NaN ἐν ἐν εν \n", "\n", " translit_SBL lemma_gloss strong sp morphology case nu gn \\\n", "0 en in G1722 preposition P NaN NaN NaN \n", "1 en in G1722 preposition P NaN NaN NaN \n", "2 en in G1722 preposition P NaN NaN NaN \n", "3 en in G1722 preposition P NaN NaN NaN \n", "4 en in G1722 preposition P NaN NaN NaN \n", "\n", " degree tense voice mood ps lemma_translit abc_order freq_lemma \n", "0 NaN NaN NaN NaN NaN en 4638 14316 \n", "1 NaN NaN NaN NaN NaN en 4638 14316 \n", "2 NaN NaN NaN NaN NaN en 4638 14316 \n", "3 NaN NaN NaN NaN NaN en 4638 14316 \n", "4 NaN NaN NaN NaN NaN en 4638 14316 " ] }, "execution_count": 161, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage2[\"freq_lemma\"]=featureaddstage2.groupby([\"lex_utf8\"])[\"lex_utf8\"].transform(\"count\")\n", "featureaddstage2.head(5)" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderchapterverseabc_orderfreq_lemma
count623693.000000623693.000000623693.000000623693.000000623693.000000
mean311847.00000019.20050116.9498787261.43062721743.617390
std180044.80505820.67968113.9941393447.81062032782.706194
min1.0000000.0000000.0000001.0000001.000000
25%155924.0000006.0000007.0000004421.000000244.000000
50%311847.00000013.00000014.0000007096.0000002522.000000
75%467770.00000025.00000023.0000009456.00000029396.000000
max623693.000000151.000000176.00000014174.00000088444.000000
\n", "
" ], "text/plain": [ " orig_order chapter verse abc_order \\\n", "count 623693.000000 623693.000000 623693.000000 623693.000000 \n", "mean 311847.000000 19.200501 16.949878 7261.430627 \n", "std 180044.805058 20.679681 13.994139 3447.810620 \n", "min 1.000000 0.000000 0.000000 1.000000 \n", "25% 155924.000000 6.000000 7.000000 4421.000000 \n", "50% 311847.000000 13.000000 14.000000 7096.000000 \n", "75% 467770.000000 25.000000 23.000000 9456.000000 \n", "max 623693.000000 151.000000 176.000000 14174.000000 \n", "\n", " freq_lemma \n", "count 623693.000000 \n", "mean 21743.617390 \n", "std 32782.706194 \n", "min 1.000000 \n", "25% 244.000000 \n", "50% 2522.000000 \n", "75% 29396.000000 \n", "max 88444.000000 " ] }, "execution_count": 162, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage2.describe()" ] }, { "cell_type": "code", "execution_count": 163, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderbookchapterversesubversewordlex_utf8g_cons_utf8translit_SBLlemma_glossstrongspmorphologycasenugndegreetensevoicemoodpslemma_translitabc_orderfreq_lemma
01Gen11NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316
143162Gen11NaNἀρχῇἀρχήαρχηarchēorigin; beginningG746nounN.DSFDatSingFemNaNNaNNaNNaNNaNarkhe1835236
145523Gen11NaNἐποίησενποιέωποιεωepoiēsendo; makeG4160verbV.AAI3SNaNSingNaNNaNAorActInd3rdpoieo105803386
179384Gen11NaNοhotheG3588pronoun, articleRA.NSMNomSingMascNaNNaNNaNNaNNaNo943488444
1063825Gen11NaNθεὸςθεόςθεοςtheosGodG2316nounN.NSMNomSingMascNaNNaNNaNNaNNaNtheos61914009
179396Gen11NaNτὸνοtontheG3588pronoun, articleRA.ASMAccSingMascNaNNaNNaNNaNNaNo943488444
1103917Gen11NaNοὐρανὸνοὐρανόςουρανοςouranonsky; heavenG3772nounN.ASMAccSingMascNaNNaNNaNNaNNaNouranos9842682
1110738Gen11NaNκαὶκαίκαιkaiand; evenG2532conjunctionCNaNNaNNaNNaNNaNNaNNaNNaNkai703062231
179409Gen11NaNτὴνοtēntheG3588pronoun, articleRA.ASFAccSingFemNaNNaNNaNNaNNaNo943488444
17330410Gen11NaNγῆνγῆγηgēnearth; landG1093nounN.ASFAccSingFemNaNNaNNaNNaNNaNge30823173
\n", "
" ], "text/plain": [ " orig_order book chapter verse subverse word lex_utf8 \\\n", "0 1 Gen 1 1 NaN ἐν ἐν \n", "14316 2 Gen 1 1 NaN ἀρχῇ ἀρχή \n", "14552 3 Gen 1 1 NaN ἐποίησεν ποιέω \n", "17938 4 Gen 1 1 NaN ὁ ὁ \n", "106382 5 Gen 1 1 NaN θεὸς θεός \n", "17939 6 Gen 1 1 NaN τὸν ὁ \n", "110391 7 Gen 1 1 NaN οὐρανὸν οὐρανός \n", "111073 8 Gen 1 1 NaN καὶ καί \n", "17940 9 Gen 1 1 NaN τὴν ὁ \n", "173304 10 Gen 1 1 NaN γῆν γῆ \n", "\n", " g_cons_utf8 translit_SBL lemma_gloss strong sp \\\n", "0 εν en in G1722 preposition \n", "14316 αρχη archē origin; beginning G746 noun \n", "14552 ποιεω epoiēsen do; make G4160 verb \n", "17938 ο ho the G3588 pronoun, article \n", "106382 θεος theos God G2316 noun \n", "17939 ο ton the G3588 pronoun, article \n", "110391 ουρανος ouranon sky; heaven G3772 noun \n", "111073 και kai and; even G2532 conjunction \n", "17940 ο tēn the G3588 pronoun, article \n", "173304 γη gēn earth; land G1093 noun \n", "\n", " morphology case nu gn degree tense voice mood ps \\\n", "0 P NaN NaN NaN NaN NaN NaN NaN NaN \n", "14316 N.DSF Dat Sing Fem NaN NaN NaN NaN NaN \n", "14552 V.AAI3S NaN Sing NaN NaN Aor Act Ind 3rd \n", "17938 RA.NSM Nom Sing Masc NaN NaN NaN NaN NaN \n", "106382 N.NSM Nom Sing Masc NaN NaN NaN NaN NaN \n", "17939 RA.ASM Acc Sing Masc NaN NaN NaN NaN NaN \n", "110391 N.ASM Acc Sing Masc NaN NaN NaN NaN NaN \n", "111073 C NaN NaN NaN NaN NaN NaN NaN NaN \n", "17940 RA.ASF Acc Sing Fem NaN NaN NaN NaN NaN \n", "173304 N.ASF Acc Sing Fem NaN NaN NaN NaN NaN \n", "\n", " lemma_translit abc_order freq_lemma \n", "0 en 4638 14316 \n", "14316 arkhe 1835 236 \n", "14552 poieo 10580 3386 \n", "17938 o 9434 88444 \n", "106382 theos 6191 4009 \n", "17939 o 9434 88444 \n", "110391 ouranos 9842 682 \n", "111073 kai 7030 62231 \n", "17940 o 9434 88444 \n", "173304 ge 3082 3173 " ] }, "execution_count": 163, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage2.sort_values(['orig_order'], ascending=True).head(10)" ] }, { "cell_type": "code", "execution_count": 164, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderchapterverseabc_orderfreq_lemma
count623693.000000623693.000000623693.000000623693.000000623693.000000
mean311847.00000019.20050116.9498787261.43062721743.617390
std180044.80505820.67968113.9941393447.81062032782.706194
min1.0000000.0000000.0000001.0000001.000000
25%155924.0000006.0000007.0000004421.000000244.000000
50%311847.00000013.00000014.0000007096.0000002522.000000
75%467770.00000025.00000023.0000009456.00000029396.000000
max623693.000000151.000000176.00000014174.00000088444.000000
\n", "
" ], "text/plain": [ " orig_order chapter verse abc_order \\\n", "count 623693.000000 623693.000000 623693.000000 623693.000000 \n", "mean 311847.000000 19.200501 16.949878 7261.430627 \n", "std 180044.805058 20.679681 13.994139 3447.810620 \n", "min 1.000000 0.000000 0.000000 1.000000 \n", "25% 155924.000000 6.000000 7.000000 4421.000000 \n", "50% 311847.000000 13.000000 14.000000 7096.000000 \n", "75% 467770.000000 25.000000 23.000000 9456.000000 \n", "max 623693.000000 151.000000 176.000000 14174.000000 \n", "\n", " freq_lemma \n", "count 623693.000000 \n", "mean 21743.617390 \n", "std 32782.706194 \n", "min 1.000000 \n", "25% 244.000000 \n", "50% 2522.000000 \n", "75% 29396.000000 \n", "max 88444.000000 " ] }, "execution_count": 164, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage2.describe()" ] }, { "cell_type": "code", "execution_count": 165, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig abc orderOccurrencesLexemeorig abc order.1Lexeme_dictStrong's numberStrong's unreliable?gloss
015Ἀαρών1Ἀαρών, ὁ2noAaron
121Ἀβαδδών2Ἀβαδδών, ὁ3noAbaddon
231ἀβαρής3ἀβαρής, -ές4nonot burdensome
343ἀββά4ἀββά, ὁ5noFather
454Ἅβελ5Ἅβελ, ὁ6noAbel
563Ἀβιά6Ἀβιά, ὁ7noAbijah
671Ἀβιαθάρ7Ἀβιαθάρ, ὁ8noAbiathar
781Ἀβιληνή8Ἀβιληνή, -ῆς, ἡ9noAbilene
892Ἀβιούδ9Ἀβιούδ, ὁ10noAbiud
91073Ἀβραάμ10Ἀβραάμ, ὁ11noAbraham
\n", "
" ], "text/plain": [ " orig abc order Occurrences Lexeme orig abc order.1 Lexeme_dict \\\n", "0 1 5 Ἀαρών 1 Ἀαρών, ὁ \n", "1 2 1 Ἀβαδδών 2 Ἀβαδδών, ὁ \n", "2 3 1 ἀβαρής 3 ἀβαρής, -ές \n", "3 4 3 ἀββά 4 ἀββά, ὁ \n", "4 5 4 Ἅβελ 5 Ἅβελ, ὁ \n", "5 6 3 Ἀβιά 6 Ἀβιά, ὁ \n", "6 7 1 Ἀβιαθάρ 7 Ἀβιαθάρ, ὁ \n", "7 8 1 Ἀβιληνή 8 Ἀβιληνή, -ῆς, ἡ \n", "8 9 2 Ἀβιούδ 9 Ἀβιούδ, ὁ \n", "9 10 73 Ἀβραάμ 10 Ἀβραάμ, ὁ \n", "\n", " Strong's number Strong's unreliable? gloss \n", "0 2 no Aaron \n", "1 3 no Abaddon \n", "2 4 no not burdensome \n", "3 5 no Father \n", "4 6 no Abel \n", "5 7 no Abijah \n", "6 8 no Abiathar \n", "7 9 no Abilene \n", "8 10 no Abiud \n", "9 11 no Abraham " ] }, "execution_count": 165, "metadata": {}, "output_type": "execute_result" } ], "source": [ "BOLgreekDICT=pd.read_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/NA1904_dictionary_v1.0.xlsx')\n", "pd.set_option('display.max_columns', 50)\n", "BOLgreekDICT.head(10)" ] }, { "cell_type": "code", "execution_count": 166, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LexemeLexeme_dictgloss
0ἈαρώνἈαρών, ὁAaron
1ἈβαδδώνἈβαδδών, ὁAbaddon
2ἀβαρήςἀβαρής, -έςnot burdensome
3ἀββάἀββά, ὁFather
4ἍβελἍβελ, ὁAbel
5ἈβιάἈβιά, ὁAbijah
6ἈβιαθάρἈβιαθάρ, ὁAbiathar
7ἈβιληνήἈβιληνή, -ῆς, ἡAbilene
8ἈβιούδἈβιούδ, ὁAbiud
9ἈβραάμἈβραάμ, ὁAbraham
\n", "
" ], "text/plain": [ " Lexeme Lexeme_dict gloss\n", "0 Ἀαρών Ἀαρών, ὁ Aaron\n", "1 Ἀβαδδών Ἀβαδδών, ὁ Abaddon\n", "2 ἀβαρής ἀβαρής, -ές not burdensome\n", "3 ἀββά ἀββά, ὁ Father\n", "4 Ἅβελ Ἅβελ, ὁ Abel\n", "5 Ἀβιά Ἀβιά, ὁ Abijah\n", "6 Ἀβιαθάρ Ἀβιαθάρ, ὁ Abiathar\n", "7 Ἀβιληνή Ἀβιληνή, -ῆς, ἡ Abilene\n", "8 Ἀβιούδ Ἀβιούδ, ὁ Abiud\n", "9 Ἀβραάμ Ἀβραάμ, ὁ Abraham" ] }, "execution_count": 166, "metadata": {}, "output_type": "execute_result" } ], "source": [ "BOLgreekDICT=BOLgreekDICT[['Lexeme','Lexeme_dict', 'gloss']]\n", "BOLgreekDICT.head(10)" ] }, { "cell_type": "code", "execution_count": 174, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lex_utf8BOL_lexeme_dictBOL_gloss
0ἈαρώνἈαρών, ὁAaron
1ἈβαδδώνἈβαδδών, ὁAbaddon
2ἀβαρήςἀβαρής, -έςnot burdensome
3ἀββάἀββά, ὁFather
4ἍβελἍβελ, ὁAbel
\n", "
" ], "text/plain": [ " lex_utf8 BOL_lexeme_dict BOL_gloss\n", "0 Ἀαρών Ἀαρών, ὁ Aaron\n", "1 Ἀβαδδών Ἀβαδδών, ὁ Abaddon\n", "2 ἀβαρής ἀβαρής, -ές not burdensome\n", "3 ἀββά ἀββά, ὁ Father\n", "4 Ἅβελ Ἅβελ, ὁ Abel" ] }, "execution_count": 174, "metadata": {}, "output_type": "execute_result" } ], "source": [ "BOLgreekDICT = BOLgreekDICT.rename({'Lexeme':'lex_utf8', 'Lexeme_dict':'BOL_lexeme_dict', 'gloss':'BOL_gloss'}, axis=1)\n", "BOLgreekDICT.head(5)" ] }, { "cell_type": "code", "execution_count": 175, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lex_utf8BOL_lexeme_dictBOL_gloss
count543353355433
unique540052975172
topταχύςταχύς, -εῖαleave behind
freq444
\n", "
" ], "text/plain": [ " lex_utf8 BOL_lexeme_dict BOL_gloss\n", "count 5433 5335 5433\n", "unique 5400 5297 5172\n", "top ταχύς ταχύς, -εῖα leave behind\n", "freq 4 4 4" ] }, "execution_count": 175, "metadata": {}, "output_type": "execute_result" } ], "source": [ "BOLgreekDICT.describe()" ] }, { "cell_type": "code", "execution_count": 176, "metadata": {}, "outputs": [], "source": [ "featureaddstage3=featureaddstage2" ] }, { "cell_type": "code", "execution_count": 177, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderchapterverseabc_orderfreq_lemma
count623693.000000623693.000000623693.000000623693.000000623693.000000
mean311847.00000019.20050116.9498787261.43062721743.617390
std180044.80505820.67968113.9941393447.81062032782.706194
min1.0000000.0000000.0000001.0000001.000000
25%155924.0000006.0000007.0000004421.000000244.000000
50%311847.00000013.00000014.0000007096.0000002522.000000
75%467770.00000025.00000023.0000009456.00000029396.000000
max623693.000000151.000000176.00000014174.00000088444.000000
\n", "
" ], "text/plain": [ " orig_order chapter verse abc_order \\\n", "count 623693.000000 623693.000000 623693.000000 623693.000000 \n", "mean 311847.000000 19.200501 16.949878 7261.430627 \n", "std 180044.805058 20.679681 13.994139 3447.810620 \n", "min 1.000000 0.000000 0.000000 1.000000 \n", "25% 155924.000000 6.000000 7.000000 4421.000000 \n", "50% 311847.000000 13.000000 14.000000 7096.000000 \n", "75% 467770.000000 25.000000 23.000000 9456.000000 \n", "max 623693.000000 151.000000 176.000000 14174.000000 \n", "\n", " freq_lemma \n", "count 623693.000000 \n", "mean 21743.617390 \n", "std 32782.706194 \n", "min 1.000000 \n", "25% 244.000000 \n", "50% 2522.000000 \n", "75% 29396.000000 \n", "max 88444.000000 " ] }, "execution_count": 177, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage3.describe()" ] }, { "cell_type": "code", "execution_count": 178, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderbookchapterversesubversewordlex_utf8g_cons_utf8translit_SBLlemma_glossstrongspmorphologycasenugndegreetensevoicemoodpslemma_translitabc_orderfreq_lemmaBOL_lexeme_dictBOL_gloss
01Gen11NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316ἐνin, on, among
186Gen16NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316ἐνin, on, among
2232Gen111NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316ἐνin, on, among
3264Gen112NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316ἐνin, on, among
4291Gen114NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316ἐνin, on, among
\n", "
" ], "text/plain": [ " orig_order book chapter verse subverse word lex_utf8 g_cons_utf8 \\\n", "0 1 Gen 1 1 NaN ἐν ἐν εν \n", "1 86 Gen 1 6 NaN ἐν ἐν εν \n", "2 232 Gen 1 11 NaN ἐν ἐν εν \n", "3 264 Gen 1 12 NaN ἐν ἐν εν \n", "4 291 Gen 1 14 NaN ἐν ἐν εν \n", "\n", " translit_SBL lemma_gloss strong sp morphology case nu gn \\\n", "0 en in G1722 preposition P NaN NaN NaN \n", "1 en in G1722 preposition P NaN NaN NaN \n", "2 en in G1722 preposition P NaN NaN NaN \n", "3 en in G1722 preposition P NaN NaN NaN \n", "4 en in G1722 preposition P NaN NaN NaN \n", "\n", " degree tense voice mood ps lemma_translit abc_order freq_lemma \\\n", "0 NaN NaN NaN NaN NaN en 4638 14316 \n", "1 NaN NaN NaN NaN NaN en 4638 14316 \n", "2 NaN NaN NaN NaN NaN en 4638 14316 \n", "3 NaN NaN NaN NaN NaN en 4638 14316 \n", "4 NaN NaN NaN NaN NaN en 4638 14316 \n", "\n", " BOL_lexeme_dict BOL_gloss \n", "0 ἐν in, on, among \n", "1 ἐν in, on, among \n", "2 ἐν in, on, among \n", "3 ἐν in, on, among \n", "4 ἐν in, on, among " ] }, "execution_count": 178, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage4=pd.merge (featureaddstage3, BOLgreekDICT,\n", " on='lex_utf8',\n", " how='left')\n", "featureaddstage4.head(5)" ] }, { "cell_type": "code", "execution_count": 180, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderchapterverseabc_orderfreq_lemma
count628353.000000628353.000000628353.000000628353.000000628353.000000
mean312013.17808919.18089216.9497487267.01674921585.945001
std179969.15737420.65819513.9947913445.96857732711.827142
min1.0000000.0000000.0000001.0000001.000000
25%156202.0000006.0000007.0000004425.000000243.000000
50%312320.00000013.00000014.0000007103.0000002223.000000
75%467690.00000025.00000023.0000009497.00000029396.000000
max623693.000000151.000000176.00000014174.00000088444.000000
\n", "
" ], "text/plain": [ " orig_order chapter verse abc_order \\\n", "count 628353.000000 628353.000000 628353.000000 628353.000000 \n", "mean 312013.178089 19.180892 16.949748 7267.016749 \n", "std 179969.157374 20.658195 13.994791 3445.968577 \n", "min 1.000000 0.000000 0.000000 1.000000 \n", "25% 156202.000000 6.000000 7.000000 4425.000000 \n", "50% 312320.000000 13.000000 14.000000 7103.000000 \n", "75% 467690.000000 25.000000 23.000000 9497.000000 \n", "max 623693.000000 151.000000 176.000000 14174.000000 \n", "\n", " freq_lemma \n", "count 628353.000000 \n", "mean 21585.945001 \n", "std 32711.827142 \n", "min 1.000000 \n", "25% 243.000000 \n", "50% 2223.000000 \n", "75% 29396.000000 \n", "max 88444.000000 " ] }, "execution_count": 180, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage4.describe()" ] }, { "cell_type": "code", "execution_count": 181, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderbookchapterversesubversewordlex_utf8g_cons_utf8translit_SBLlemma_glossstrongspmorphologycasenugndegreetensevoicemoodpslemma_translitabc_orderfreq_lemmaBOL_lexeme_dictBOL_gloss
01Gen11NaNἐνἐνενeninG1722prepositionPNaNNaNNaNNaNNaNNaNNaNNaNen463814316ἐνin, on, among
143162Gen11NaNἀρχῇἀρχήαρχηarchēorigin; beginningG746nounN.DSFDatSingFemNaNNaNNaNNaNNaNarkhe1835236ἀρχή, -ῆς, ἡruler, beginning
145523Gen11NaNἐποίησενποιέωποιεωepoiēsendo; makeG4160verbV.AAI3SNaNSingNaNNaNAorActInd3rdpoieo105803386ποιέωdo, make
179384Gen11NaNοhotheG3588pronoun, articleRA.NSMNomSingMascNaNNaNNaNNaNNaNo943488444ὁ, ἡ, τόthe
1063825Gen11NaNθεὸςθεόςθεοςtheosGodG2316nounN.NSMNomSingMascNaNNaNNaNNaNNaNtheos61914009θεός, -οῦ, ὁGod, god
179396Gen11NaNτὸνοtontheG3588pronoun, articleRA.ASMAccSingMascNaNNaNNaNNaNNaNo943488444ὁ, ἡ, τόthe
1103917Gen11NaNοὐρανὸνοὐρανόςουρανοςouranonsky; heavenG3772nounN.ASMAccSingMascNaNNaNNaNNaNNaNouranos9842682οὐρανός, -οῦ, ὁsky, heaven
1110738Gen11NaNκαὶκαίκαιkaiand; evenG2532conjunctionCNaNNaNNaNNaNNaNNaNNaNNaNkai703062231καίand, even, also, namely
179409Gen11NaNτὴνοtēntheG3588pronoun, articleRA.ASFAccSingFemNaNNaNNaNNaNNaNo943488444ὁ, ἡ, τόthe
17330410Gen11NaNγῆνγῆγηgēnearth; landG1093nounN.ASFAccSingFemNaNNaNNaNNaNNaNge30823173γῆ, γῆς, ἡearth, soil, land
\n", "
" ], "text/plain": [ " orig_order book chapter verse subverse word lex_utf8 \\\n", "0 1 Gen 1 1 NaN ἐν ἐν \n", "14316 2 Gen 1 1 NaN ἀρχῇ ἀρχή \n", "14552 3 Gen 1 1 NaN ἐποίησεν ποιέω \n", "17938 4 Gen 1 1 NaN ὁ ὁ \n", "106382 5 Gen 1 1 NaN θεὸς θεός \n", "17939 6 Gen 1 1 NaN τὸν ὁ \n", "110391 7 Gen 1 1 NaN οὐρανὸν οὐρανός \n", "111073 8 Gen 1 1 NaN καὶ καί \n", "17940 9 Gen 1 1 NaN τὴν ὁ \n", "173304 10 Gen 1 1 NaN γῆν γῆ \n", "\n", " g_cons_utf8 translit_SBL lemma_gloss strong sp \\\n", "0 εν en in G1722 preposition \n", "14316 αρχη archē origin; beginning G746 noun \n", "14552 ποιεω epoiēsen do; make G4160 verb \n", "17938 ο ho the G3588 pronoun, article \n", "106382 θεος theos God G2316 noun \n", "17939 ο ton the G3588 pronoun, article \n", "110391 ουρανος ouranon sky; heaven G3772 noun \n", "111073 και kai and; even G2532 conjunction \n", "17940 ο tēn the G3588 pronoun, article \n", "173304 γη gēn earth; land G1093 noun \n", "\n", " morphology case nu gn degree tense voice mood ps \\\n", "0 P NaN NaN NaN NaN NaN NaN NaN NaN \n", "14316 N.DSF Dat Sing Fem NaN NaN NaN NaN NaN \n", "14552 V.AAI3S NaN Sing NaN NaN Aor Act Ind 3rd \n", "17938 RA.NSM Nom Sing Masc NaN NaN NaN NaN NaN \n", "106382 N.NSM Nom Sing Masc NaN NaN NaN NaN NaN \n", "17939 RA.ASM Acc Sing Masc NaN NaN NaN NaN NaN \n", "110391 N.ASM Acc Sing Masc NaN NaN NaN NaN NaN \n", "111073 C NaN NaN NaN NaN NaN NaN NaN NaN \n", "17940 RA.ASF Acc Sing Fem NaN NaN NaN NaN NaN \n", "173304 N.ASF Acc Sing Fem NaN NaN NaN NaN NaN \n", "\n", " lemma_translit abc_order freq_lemma BOL_lexeme_dict \\\n", "0 en 4638 14316 ἐν \n", "14316 arkhe 1835 236 ἀρχή, -ῆς, ἡ \n", "14552 poieo 10580 3386 ποιέω \n", "17938 o 9434 88444 ὁ, ἡ, τό \n", "106382 theos 6191 4009 θεός, -οῦ, ὁ \n", "17939 o 9434 88444 ὁ, ἡ, τό \n", "110391 ouranos 9842 682 οὐρανός, -οῦ, ὁ \n", "111073 kai 7030 62231 καί \n", "17940 o 9434 88444 ὁ, ἡ, τό \n", "173304 ge 3082 3173 γῆ, γῆς, ἡ \n", "\n", " BOL_gloss \n", "0 in, on, among \n", "14316 ruler, beginning \n", "14552 do, make \n", "17938 the \n", "106382 God, god \n", "17939 the \n", "110391 sky, heaven \n", "111073 and, even, also, namely \n", "17940 the \n", "173304 earth, soil, land " ] }, "execution_count": 181, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage4 = featureaddstage4.drop_duplicates(['orig_order']).sort_values(by='orig_order', ascending=[True])\n", "featureaddstage4.head(10)" ] }, { "cell_type": "code", "execution_count": 182, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderchapterverseabc_orderfreq_lemma
count623693.000000623693.000000623693.000000623693.000000623693.000000
mean311847.00000019.20050116.9498787261.43062721743.617390
std180044.80505720.67968113.9941393447.81062032782.706194
min1.0000000.0000000.0000001.0000001.000000
25%155924.0000006.0000007.0000004421.000000244.000000
50%311847.00000013.00000014.0000007096.0000002522.000000
75%467770.00000025.00000023.0000009456.00000029396.000000
max623693.000000151.000000176.00000014174.00000088444.000000
\n", "
" ], "text/plain": [ " orig_order chapter verse abc_order \\\n", "count 623693.000000 623693.000000 623693.000000 623693.000000 \n", "mean 311847.000000 19.200501 16.949878 7261.430627 \n", "std 180044.805057 20.679681 13.994139 3447.810620 \n", "min 1.000000 0.000000 0.000000 1.000000 \n", "25% 155924.000000 6.000000 7.000000 4421.000000 \n", "50% 311847.000000 13.000000 14.000000 7096.000000 \n", "75% 467770.000000 25.000000 23.000000 9456.000000 \n", "max 623693.000000 151.000000 176.000000 14174.000000 \n", "\n", " freq_lemma \n", "count 623693.000000 \n", "mean 21743.617390 \n", "std 32782.706194 \n", "min 1.000000 \n", "25% 244.000000 \n", "50% 2522.000000 \n", "75% 29396.000000 \n", "max 88444.000000 " ] }, "execution_count": 182, "metadata": {}, "output_type": "execute_result" } ], "source": [ "featureaddstage4.describe()" ] }, { "cell_type": "code", "execution_count": 183, "metadata": {}, "outputs": [], "source": [ "featureaddstage4.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_CCATLXX/LXX_source_v1.4.xlsx')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Testing" ] }, { "cell_type": "code", "execution_count": 185, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# First, I have to laod different modules that I use for analyzing the data and for plotting:\n", "import sys, os, collections\n", "import pandas as pd\n", "import numpy as np\n", "import re\n", "import csv\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt; plt.rcdefaults()\n", "from matplotlib.pyplot import figure\n", "from collections import Counter\n", "\n", "# Second, I have to load the Text Fabric app\n", "from tf.fabric import Fabric\n", "from tf.app import use" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This is Text-Fabric 9.1.11\n", "Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html\n", "\n", "29 features found and 0 ignored\n" ] }, { "data": { "text/html": [ "Text-Fabric: Text-Fabric API 9.1.11, no app configured
Data: D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_CCATLXX/CCATLXX/tf/1994_v2
Features:
\n", "
D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_CCATLXX/CCATLXX/tf/1994_v2\n", "
\n", "\n", "
\n", "
\n", "abc_order\n", "
\n", "
str
\n", "
\n", " dictionary order\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:15:59Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "bol_gloss\n", "
\n", "
str
\n", "
\n", " BOL English gloss\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:15:57Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "bol_lexeme_dict\n", "
\n", "
str
\n", "
\n", " BOL dictionary form of lemma\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:15:58Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "book\n", "
\n", "
str
\n", "
\n", " book\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:00Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "case\n", "
\n", "
str
\n", "
\n", " case\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "chapter\n", "
\n", "
int
\n", "
\n", " chapter\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:02Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "degree\n", "
\n", "
str
\n", "
\n", " degree\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "freq_lemma\n", "
\n", "
str
\n", "
\n", " frequency of word in corpus\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "g_cons_utf8\n", "
\n", "
str
\n", "
\n", " word without accents\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:05Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "gloss\n", "
\n", "
str
\n", "
\n", " gloss\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "gn\n", "
\n", "
str
\n", "
\n", " gender\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "lex\n", "
\n", "
str
\n", "
\n", " lemma transliteration\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:09Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "lex_utf8\n", "
\n", "
str
\n", "
\n", " normalized word\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:10Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "mood\n", "
\n", "
str
\n", "
\n", " mood\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "morphology\n", "
\n", "
str
\n", "
\n", " morphology\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:12Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "nu\n", "
\n", "
str
\n", "
\n", " number\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "orig_order\n", "
\n", "
str
\n", "
\n", " original word order\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "
\n", " \n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:16Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "ps\n", "
\n", "
str
\n", "
\n", " person\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "sp\n", "
\n", "
str
\n", "
\n", " part of speech\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "strong\n", "
\n", "
str
\n", "
\n", " strongs number\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "subverse\n", "
\n", "
str
\n", "
\n", " subverse\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:20Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "tense\n", "
\n", "
str
\n", "
\n", " tense\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "translit_SBL\n", "
\n", "
str
\n", "
\n", " SBL transliteration\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:22Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "verse\n", "
\n", "
int
\n", "
\n", " verse\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:23Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "voice\n", "
\n", "
str
\n", "
\n", " voice\n", "
\n", "\n", "
\n", "
Author:
\n", "
?
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
?
\n", "
\n", "\n", "
\n", "
Version:
\n", "
?
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-23T04:29:04Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "word\n", "
\n", "
str
\n", "
\n", " text realized word\n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:26Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "
\n", " \n", "
\n", "\n", "
\n", "
Author:
\n", "
Rahlfs
\n", "
\n", "\n", "
\n", "
Converter:
\n", "
Adrian Negrea, Oliver Glanz
\n", "
\n", "\n", "
\n", "
Editors:
\n", "
CCAT, Eliran Wong
\n", "
\n", "\n", "
\n", "
Name:
\n", "
LXX
\n", "
\n", "\n", "
\n", "
Note:
\n", "
?
\n", "
\n", "\n", "
\n", "
Source::
\n", "
https://github.com/eliranwong/LXX-Rahlfs-1935
\n", "
\n", "\n", "
\n", "
Version:
\n", "
1935
\n", "
\n", "\n", "
\n", "
dateWritten:
\n", "
2021-12-24T02:16:27Z
\n", "
\n", "\n", "
\n", "
writtenBy:
\n", "
Text-Fabric
\n", "
\n", "\n", "
\n", "
\n", "
\n", "\n", "
\n", "
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Text-Fabric API: names N F E L T S C TF directly usable

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "rate limit is 60 requests per hour, with 57 left for this hour\n", "To increase the rate,see https://annotation.github.io/text-fabric/tf/advanced/repo.html/\n", "\tconnecting to online GitHub repo annotation/app-D ... failed\n", "GitHub says: 404 {\"message\": \"Not Found\", \"documentation_url\": \"https://docs.github.com/rest/reference/repos#get-a-repository\"}\n", "The requested TF-app is not available offline\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "No online connection\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "This is Text-Fabric 9.1.11\n", "Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html\n", "\n", "0 features found and 0 ignored\n", " 0.00s Not all of the warp features otype and oslots are present in\n", "None/D/\n", " 0.00s Only the Feature and Edge APIs will be enabled\n", " 0.00s Warp feature \"otext\" not found. Working without Text-API\n", "\n" ] }, { "data": { "text/html": [ "Text-Fabric: Text-Fabric API 9.1.11, no app configured
Data: None/D
Features:
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Text-Fabric API: names N F E L T S C TF directly usable

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#LXX = use('CCATLXX/tf/1994_v1', hoist=globals())\n", "LXX = use('CCATLXX/tf/1994_v2', hoist=globals())\n", "LXX = use('D:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_CCATLXX/CCATLXX/tf/1994_v2', hoist=globals())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.42s 10 results\n" ] }, { "data": { "text/html": [ "

verse 1

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Gen 1:1
verse
verse=1
subverse
subverse=
ἐν
BOL_gloss=in, on, amongBOL_lexeme_dict=ἐνabc_order=4638book=Gencase=chapter=1degree=freq_lemma=14316g_cons_utf8=ενgn=lemma_gloss=inlemma_translit=enlex_utf8=ἐνmood=morphology=Pnu=orig_order=1ps=sp=prepositionstrong=G1722subverse=tense=translit_SBL=enverse=1voice=word=ἐν
ἀρχῇ
BOL_gloss=ruler, beginningBOL_lexeme_dict=ἀρχή, -ῆς, ἡabc_order=1835book=Gencase=Datchapter=1degree=freq_lemma=236g_cons_utf8=αρχηgn=Femlemma_gloss=origin; beginninglemma_translit=arkhelex_utf8=ἀρχήmood=morphology=N.DSFnu=Singorig_order=2ps=sp=nounstrong=G746subverse=tense=translit_SBL=archēverse=1voice=word=ἀρχῇ
ἐποίησεν
BOL_gloss=do, makeBOL_lexeme_dict=ποιέωabc_order=10580book=Gencase=chapter=1degree=freq_lemma=3386g_cons_utf8=ποιεωgn=lemma_gloss=do; makelemma_translit=poieolex_utf8=ποιέωmood=Indmorphology=V.AAI3Snu=Singorig_order=3ps=3rdsp=verbstrong=G4160subverse=tense=Aortranslit_SBL=epoiēsenverse=1voice=Actword=ἐποίησεν
BOL_gloss=theBOL_lexeme_dict=ὁ, ἡ, τόabc_order=9434book=Gencase=Nomchapter=1degree=freq_lemma=88444g_cons_utf8=οgn=Masclemma_gloss=thelemma_translit=olex_utf8=mood=morphology=RA.NSMnu=Singorig_order=4ps=sp=pronoun, articlestrong=G3588subverse=tense=translit_SBL=hoverse=1voice=word=
θεὸς
BOL_gloss=God, godBOL_lexeme_dict=θεός, -οῦ, ὁabc_order=6191book=Gencase=Nomchapter=1degree=freq_lemma=4009g_cons_utf8=θεοςgn=Masclemma_gloss=Godlemma_translit=theoslex_utf8=θεόςmood=morphology=N.NSMnu=Singorig_order=5ps=sp=nounstrong=G2316subverse=tense=translit_SBL=theosverse=1voice=word=θεὸς
τὸν
BOL_gloss=theBOL_lexeme_dict=ὁ, ἡ, τόabc_order=9434book=Gencase=Accchapter=1degree=freq_lemma=88444g_cons_utf8=οgn=Masclemma_gloss=thelemma_translit=olex_utf8=mood=morphology=RA.ASMnu=Singorig_order=6ps=sp=pronoun, articlestrong=G3588subverse=tense=translit_SBL=tonverse=1voice=word=τὸν
οὐρανὸν
BOL_gloss=sky, heavenBOL_lexeme_dict=οὐρανός, -οῦ, ὁabc_order=9842book=Gencase=Accchapter=1degree=freq_lemma=682g_cons_utf8=ουρανοςgn=Masclemma_gloss=sky; heavenlemma_translit=ouranoslex_utf8=οὐρανόςmood=morphology=N.ASMnu=Singorig_order=7ps=sp=nounstrong=G3772subverse=tense=translit_SBL=ouranonverse=1voice=word=οὐρανὸν
καὶ
BOL_gloss=and, even, also, namelyBOL_lexeme_dict=καίabc_order=7030book=Gencase=chapter=1degree=freq_lemma=62231g_cons_utf8=καιgn=lemma_gloss=and; evenlemma_translit=kailex_utf8=καίmood=morphology=Cnu=orig_order=8ps=sp=conjunctionstrong=G2532subverse=tense=translit_SBL=kaiverse=1voice=word=καὶ
τὴν
BOL_gloss=theBOL_lexeme_dict=ὁ, ἡ, τόabc_order=9434book=Gencase=Accchapter=1degree=freq_lemma=88444g_cons_utf8=οgn=Femlemma_gloss=thelemma_translit=olex_utf8=mood=morphology=RA.ASFnu=Singorig_order=9ps=sp=pronoun, articlestrong=G3588subverse=tense=translit_SBL=tēnverse=1voice=word=τὴν
γῆν
BOL_gloss=earth, soil, landBOL_lexeme_dict=γῆ, γῆς, ἡabc_order=3082book=Gencase=Accchapter=1degree=freq_lemma=3173g_cons_utf8=γηgn=Femlemma_gloss=earth; landlemma_translit=gelex_utf8=γῆmood=morphology=N.ASFnu=Singorig_order=10ps=sp=nounstrong=G1093subverse=tense=translit_SBL=gēnverse=1voice=word=γῆν
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "Search0 = '''\n", "book book=Gen\n", " chapter chapter=1\n", " verse verse=1\n", " word\n", "'''\n", "Search0 = LXX.search(Search0)\n", "LXX.show(Search0, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'orig_order','book','chapter','verse','subverse','word','lex_utf8','g_cons_utf8','translit_SBL','lemma_gloss','strong','sp','morphology','case','nu','gn','degree','tense','voice','mood','ps','lemma_translit','abc_order','freq_lemma','BOL_lexeme_dict','BOL_gloss'})" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.15s 0 results\n" ] } ], "source": [ "Search1 = '''\n", "verse book=Gen chapter=1 verse=1\n", " word\n", "'''\n", "Search1 = LXX.search(Search1)\n", "LXX.show(Search1, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'subverse'})" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2.74s 10 results\n" ] }, { "data": { "text/html": [ "

verse 1

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Gen 1:1
verse
verse=1
subverse
subverse=
ἐν
g_cons_utf8=ενlex_utf8=ἐνmorphology=Ptranslit_SBL=enword=ἐν
ἀρχῇ
g_cons_utf8=αρχηlex_utf8=ἀρχήmorphology=N.DSFtranslit_SBL=archēword=ἀρχῇ
ἐποίησεν
g_cons_utf8=ποιεωlex_utf8=ποιέωmorphology=V.AAI3Stranslit_SBL=epoiēsenword=ἐποίησεν
g_cons_utf8=οlex_utf8=morphology=RA.NSMtranslit_SBL=howord=
θεὸς
g_cons_utf8=θεοςlex_utf8=θεόςmorphology=N.NSMtranslit_SBL=theosword=θεὸς
τὸν
g_cons_utf8=οlex_utf8=morphology=RA.ASMtranslit_SBL=tonword=τὸν
οὐρανὸν
g_cons_utf8=ουρανοςlex_utf8=οὐρανόςmorphology=N.ASMtranslit_SBL=ouranonword=οὐρανὸν
καὶ
g_cons_utf8=καιlex_utf8=καίmorphology=Ctranslit_SBL=kaiword=καὶ
τὴν
g_cons_utf8=οlex_utf8=morphology=RA.ASFtranslit_SBL=tēnword=τὴν
γῆν
g_cons_utf8=γηlex_utf8=γῆmorphology=N.ASFtranslit_SBL=gēnword=γῆν
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "Search2 = '''\n", "book book=Gen\n", " chapter chapter=1\n", " verse verse=1\n", " word word* lex_utf8 g_cons_utf8 morphology* translit_SBL\n", "'''\n", "Search2 = LXX.search(Search2)\n", "LXX.show(Search2, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'subverse'})" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.03s 1 result\n" ] }, { "data": { "text/html": [ "

verse 1

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Num 21:3
verse
verse=3
subverse
καὶ
book=Numcase=chapter=21lemma_translit=kaiverse=3word=καὶ
εἰσήκουσεν
book=Numcase=chapter=21lemma_translit=eisakouoverse=3word=εἰσήκουσεν
κύριος
book=Numcase=Nomchapter=21lemma_translit=kuriosverse=3word=κύριος
τῆς
book=Numcase=Genchapter=21lemma_translit=overse=3word=τῆς
φωνῆς
book=Numcase=Genchapter=21lemma_translit=phoneverse=3word=φωνῆς
Ισραηλ
book=Numcase=Genchapter=21lemma_translit=Israelverse=3word=Ισραηλ
καὶ
book=Numcase=chapter=21lemma_translit=kaiverse=3word=καὶ
παρέδωκεν
book=Numcase=chapter=21lemma_translit=paradidomiverse=3word=παρέδωκεν
τὸν
book=Numcase=Accchapter=21lemma_translit=overse=3word=τὸν
Χανανιν
book=Numcase=Accchapter=21lemma_translit=Khananisverse=3word=Χανανιν
ὑποχείριον
book=Numcase=Accchapter=21lemma_translit=upokheiriosverse=3word=ὑποχείριον
αὐτοῦ
book=Numcase=Genchapter=21lemma_translit=autosverse=3word=αὐτοῦ
καὶ
book=Numcase=chapter=21lemma_translit=kaiverse=3word=καὶ
ἀνεθεμάτισεν
book=Numcase=chapter=21lemma_translit=anathematizoverse=3word=ἀνεθεμάτισεν
αὐτὸν
book=Numcase=Accchapter=21lemma_translit=autosverse=3word=αὐτὸν
καὶ
book=Numcase=chapter=21lemma_translit=kaiverse=3word=καὶ
τὰς
book=Numcase=Accchapter=21lemma_translit=overse=3word=τὰς
πόλεις
book=Numcase=Accchapter=21lemma_translit=polisverse=3word=πόλεις
αὐτοῦ
book=Numcase=Genchapter=21lemma_translit=autosverse=3word=αὐτοῦ
καὶ
book=Numcase=chapter=21lemma_translit=kaiverse=3word=καὶ
ἐπεκάλεσαν
book=Numcase=chapter=21lemma_translit=epikaleoverse=3word=ἐπεκάλεσαν
τὸ
book=Numcase=Accchapter=21lemma_translit=overse=3word=τὸ
ὄνομα
book=Numcase=Accchapter=21lemma_translit=onomaverse=3word=ὄνομα
τοῦ
book=Numcase=Genchapter=21lemma_translit=overse=3word=τοῦ
τόπου
book=Numcase=Genchapter=21lemma_translit=toposverse=3word=τόπου
ἐκείνου
book=Numcase=Genchapter=21lemma_translit=ekeinosverse=3word=ἐκείνου
ἀνάθεμα
book=Numcase=Accchapter=21lemma_translit=anathemaverse=3word=ἀνάθεμα
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "Search3 = '''\n", "book book=Num\n", " chapter chapter=21\n", " verse verse=3\n", "\n", "'''\n", "Search3 = LXX.search(Search3)\n", "LXX.show(Search3, start=1, end=1, condensed=True, colorMap={1:'pink'}, extraFeatures={'word', 'lemma_translit', 'case'})" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1.98s 12 results\n" ] }, { "data": { "text/html": [ "

verse 1

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Gen 21:17
verse
subverse
εἰσήκουσεν
BOL_lexeme_dict=εἰσακούωbook=Gencase=lemma_translit=eisakouolex_utf8=εἰσακούωword=εἰσήκουσεν
δὲ
BOL_lexeme_dict=δέbook=Gencase=lemma_translit=delex_utf8=δέword=δὲ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Nomlemma_translit=olex_utf8=word=
θεὸς
BOL_lexeme_dict=θεός, -οῦ, ὁbook=Gencase=Nomlemma_translit=theoslex_utf8=θεόςword=θεὸς
τῆς
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Genlemma_translit=olex_utf8=word=τῆς
φωνῆς
BOL_lexeme_dict=φωνή, -ῆς, ἡbook=Gencase=Genlemma_translit=phonelex_utf8=φωνήword=φωνῆς
τοῦ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Genlemma_translit=olex_utf8=word=τοῦ
παιδίου
BOL_lexeme_dict=παιδίον, -ου, τόbook=Gencase=Genlemma_translit=paidionlex_utf8=παιδίονword=παιδίου
ἐκ
BOL_lexeme_dict=ἐκ, ἐξbook=Gencase=lemma_translit=eklex_utf8=ἐκword=ἐκ
τοῦ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Genlemma_translit=olex_utf8=word=τοῦ
τόπου
BOL_lexeme_dict=τόπος, -ου, ὁbook=Gencase=Genlemma_translit=toposlex_utf8=τόποςword=τόπου
οὗ
BOL_lexeme_dict=book=Gencase=lemma_translit=oulex_utf8=οὗword=οὗ
ἦν
BOL_lexeme_dict=εἰμίbook=Gencase=lemma_translit=eimilex_utf8=εἰμίword=ἦν
καὶ
BOL_lexeme_dict=καίbook=Gencase=lemma_translit=kailex_utf8=καίword=καὶ
ἐκάλεσεν
BOL_lexeme_dict=καλέωbook=Gencase=lemma_translit=kaleolex_utf8=καλέωword=ἐκάλεσεν
ἄγγελος
BOL_lexeme_dict=ἄγγελος, -ου, ὁbook=Gencase=Nomlemma_translit=aggeloslex_utf8=ἄγγελοςword=ἄγγελος
τοῦ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Genlemma_translit=olex_utf8=word=τοῦ
θεοῦ
BOL_lexeme_dict=θεός, -οῦ, ὁbook=Gencase=Genlemma_translit=theoslex_utf8=θεόςword=θεοῦ
τὴν
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Acclemma_translit=olex_utf8=word=τὴν
Αγαρ
BOL_lexeme_dict=book=Gencase=Acclemma_translit=Agarlex_utf8=Ἄγαρword=Αγαρ
ἐκ
BOL_lexeme_dict=ἐκ, ἐξbook=Gencase=lemma_translit=eklex_utf8=ἐκword=ἐκ
τοῦ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Genlemma_translit=olex_utf8=word=τοῦ
οὐρανοῦ
BOL_lexeme_dict=οὐρανός, -οῦ, ὁbook=Gencase=Genlemma_translit=ouranoslex_utf8=οὐρανόςword=οὐρανοῦ
καὶ
BOL_lexeme_dict=καίbook=Gencase=lemma_translit=kailex_utf8=καίword=καὶ
εἶπεν
BOL_lexeme_dict=book=Gencase=lemma_translit=epolex_utf8=ἔπωword=εἶπεν
αὐτῇ
BOL_lexeme_dict=αὐτός, -ή, -όbook=Gencase=Datlemma_translit=autoslex_utf8=αὐτόςword=αὐτῇ
τί
BOL_lexeme_dict=τίς, τίbook=Gencase=Nomlemma_translit=tislex_utf8=τίςword=τί
ἐστιν
BOL_lexeme_dict=εἰμίbook=Gencase=lemma_translit=eimilex_utf8=εἰμίword=ἐστιν
Αγαρ
BOL_lexeme_dict=book=Gencase=Voclemma_translit=Agarlex_utf8=Ἄγαρword=Αγαρ
μὴ
BOL_lexeme_dict=μήbook=Gencase=lemma_translit=melex_utf8=μήword=μὴ
φοβοῦ
BOL_lexeme_dict=φοβέομαιbook=Gencase=lemma_translit=phobeolex_utf8=φοβέωword=φοβοῦ
ἐπακήκοεν
BOL_lexeme_dict=ἐπακούωbook=Gencase=lemma_translit=epakouolex_utf8=ἐπακούωword=ἐπακήκοεν
γὰρ
BOL_lexeme_dict=γάρbook=Gencase=lemma_translit=garlex_utf8=γάρword=γὰρ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Nomlemma_translit=olex_utf8=word=
θεὸς
BOL_lexeme_dict=θεός, -οῦ, ὁbook=Gencase=Nomlemma_translit=theoslex_utf8=θεόςword=θεὸς
τῆς
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Genlemma_translit=olex_utf8=word=τῆς
φωνῆς
BOL_lexeme_dict=φωνή, -ῆς, ἡbook=Gencase=Genlemma_translit=phonelex_utf8=φωνήword=φωνῆς
τοῦ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Genlemma_translit=olex_utf8=word=τοῦ
παιδίου
BOL_lexeme_dict=παιδίον, -ου, τόbook=Gencase=Genlemma_translit=paidionlex_utf8=παιδίονword=παιδίου
σου
BOL_lexeme_dict=book=Gencase=Genlemma_translit=soulex_utf8=σοῦword=σου
ἐκ
BOL_lexeme_dict=ἐκ, ἐξbook=Gencase=lemma_translit=eklex_utf8=ἐκword=ἐκ
τοῦ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Genlemma_translit=olex_utf8=word=τοῦ
τόπου
BOL_lexeme_dict=τόπος, -ου, ὁbook=Gencase=Genlemma_translit=toposlex_utf8=τόποςword=τόπου
οὗ
BOL_lexeme_dict=book=Gencase=lemma_translit=oulex_utf8=οὗword=οὗ
ἐστιν
BOL_lexeme_dict=εἰμίbook=Gencase=lemma_translit=eimilex_utf8=εἰμίword=ἐστιν
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

verse 2

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Gen 30:6
verse
subverse
καὶ
BOL_lexeme_dict=καίbook=Gencase=lemma_translit=kailex_utf8=καίword=καὶ
εἶπεν
BOL_lexeme_dict=book=Gencase=lemma_translit=epolex_utf8=ἔπωword=εἶπεν
Ραχηλ
BOL_lexeme_dict=Ῥαχήλ, ἡbook=Gencase=Nomlemma_translit=Rakhellex_utf8=Ῥαχήλword=Ραχηλ
ἔκρινέν
BOL_lexeme_dict=κρίνωbook=Gencase=lemma_translit=krinolex_utf8=κρίνωword=ἔκρινέν
μοι
BOL_lexeme_dict=book=Gencase=Datlemma_translit=moilex_utf8=μοιword=μοι
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Nomlemma_translit=olex_utf8=word=
θεὸς
BOL_lexeme_dict=θεός, -οῦ, ὁbook=Gencase=Nomlemma_translit=theoslex_utf8=θεόςword=θεὸς
καὶ
BOL_lexeme_dict=καίbook=Gencase=lemma_translit=kailex_utf8=καίword=καὶ
ἐπήκουσεν
BOL_lexeme_dict=ἐπακούωbook=Gencase=lemma_translit=epakouolex_utf8=ἐπακούωword=ἐπήκουσεν
τῆς
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Genlemma_translit=olex_utf8=word=τῆς
φωνῆς
BOL_lexeme_dict=φωνή, -ῆς, ἡbook=Gencase=Genlemma_translit=phonelex_utf8=φωνήword=φωνῆς
μου
BOL_lexeme_dict=book=Gencase=Genlemma_translit=moulex_utf8=μουword=μου
καὶ
BOL_lexeme_dict=καίbook=Gencase=lemma_translit=kailex_utf8=καίword=καὶ
ἔδωκέν
BOL_lexeme_dict=δίδωμιbook=Gencase=lemma_translit=didomilex_utf8=δίδωμιword=ἔδωκέν
μοι
BOL_lexeme_dict=book=Gencase=Datlemma_translit=moilex_utf8=μοιword=μοι
υἱόν
BOL_lexeme_dict=υἱός, -οῦ, ὁbook=Gencase=Acclemma_translit=uioslex_utf8=υἱόςword=υἱόν
διὰ
BOL_lexeme_dict=διάbook=Gencase=lemma_translit=dialex_utf8=διάword=διὰ
τοῦτο
BOL_lexeme_dict=οὗτος, αὕτη, τοῦτοbook=Gencase=Acclemma_translit=outoslex_utf8=οὗτοςword=τοῦτο
ἐκάλεσεν
BOL_lexeme_dict=καλέωbook=Gencase=lemma_translit=kaleolex_utf8=καλέωword=ἐκάλεσεν
τὸ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Gencase=Acclemma_translit=olex_utf8=word=τὸ
ὄνομα
BOL_lexeme_dict=ὄνομα, -τος, τόbook=Gencase=Acclemma_translit=onomalex_utf8=ὄνομαword=ὄνομα
αὐτοῦ
BOL_lexeme_dict=αὐτός, -ή, -όbook=Gencase=Genlemma_translit=autoslex_utf8=αὐτόςword=αὐτοῦ
Δαν
BOL_lexeme_dict=book=Gencase=Genlemma_translit=Danlex_utf8=Δανword=Δαν
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

verse 3

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Josh 22:2
verse
subverse
καὶ
BOL_lexeme_dict=καίbook=Joshcase=lemma_translit=kailex_utf8=καίword=καὶ
εἶπεν
BOL_lexeme_dict=book=Joshcase=lemma_translit=epolex_utf8=ἔπωword=εἶπεν
αὐτοῖς
BOL_lexeme_dict=αὐτός, -ή, -όbook=Joshcase=Datlemma_translit=autoslex_utf8=αὐτόςword=αὐτοῖς
ὑμεῖς
BOL_lexeme_dict=book=Joshcase=Nomlemma_translit=umeislex_utf8=ὑμεῖςword=ὑμεῖς
ἀκηκόατε
BOL_lexeme_dict=ἀκούωbook=Joshcase=lemma_translit=akouolex_utf8=ἀκούωword=ἀκηκόατε
πάντα
BOL_lexeme_dict=πᾶς, πᾶσα, πᾶνbook=Joshcase=Acclemma_translit=paslex_utf8=πᾶςword=πάντα
ὅσα
BOL_lexeme_dict=ὅσος, -η, -ονbook=Joshcase=Acclemma_translit=ososlex_utf8=ὅσοςword=ὅσα
ἐνετείλατο
BOL_lexeme_dict=ἐντέλλομαιbook=Joshcase=lemma_translit=entellomailex_utf8=ἐντέλλομαιword=ἐνετείλατο
ὑμῖν
BOL_lexeme_dict=book=Joshcase=Datlemma_translit=uminlex_utf8=ὑμῖνword=ὑμῖν
Μωυσῆς
BOL_lexeme_dict=book=Joshcase=Nomlemma_translit=Moseuslex_utf8=Μωσεύςword=Μωυσῆς
BOL_lexeme_dict=ὁ, ἡ, τόbook=Joshcase=Nomlemma_translit=olex_utf8=word=
παῖς
BOL_lexeme_dict=παῖς, -παιδός, ὁ, ἡbook=Joshcase=Nomlemma_translit=paislex_utf8=παῖςword=παῖς
κυρίου
BOL_lexeme_dict=κύριος, -ου, ὁbook=Joshcase=Genlemma_translit=kurioslex_utf8=κύριοςword=κυρίου
καὶ
BOL_lexeme_dict=καίbook=Joshcase=lemma_translit=kailex_utf8=καίword=καὶ
ἐπηκούσατε
BOL_lexeme_dict=ἐπακούωbook=Joshcase=lemma_translit=epakouolex_utf8=ἐπακούωword=ἐπηκούσατε
τῆς
BOL_lexeme_dict=ὁ, ἡ, τόbook=Joshcase=Genlemma_translit=olex_utf8=word=τῆς
φωνῆς
BOL_lexeme_dict=φωνή, -ῆς, ἡbook=Joshcase=Genlemma_translit=phonelex_utf8=φωνήword=φωνῆς
μου
BOL_lexeme_dict=book=Joshcase=Genlemma_translit=moulex_utf8=μουword=μου
κατὰ
BOL_lexeme_dict=κατάbook=Joshcase=lemma_translit=katalex_utf8=κατάword=κατὰ
πάντα
BOL_lexeme_dict=πᾶς, πᾶσα, πᾶνbook=Joshcase=Acclemma_translit=paslex_utf8=πᾶςword=πάντα
ὅσα
BOL_lexeme_dict=ὅσος, -η, -ονbook=Joshcase=Acclemma_translit=ososlex_utf8=ὅσοςword=ὅσα
ἐνετειλάμην
BOL_lexeme_dict=ἐντέλλομαιbook=Joshcase=lemma_translit=entellomailex_utf8=ἐντέλλομαιword=ἐνετειλάμην
ὑμῖν
BOL_lexeme_dict=book=Joshcase=Datlemma_translit=uminlex_utf8=ὑμῖνword=ὑμῖν
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

verse 4

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Judg 13:9
verse
subverse
καὶ
BOL_lexeme_dict=καίbook=Judgcase=lemma_translit=kailex_utf8=καίword=καὶ
ἐπήκουσεν
BOL_lexeme_dict=ἐπακούωbook=Judgcase=lemma_translit=epakouolex_utf8=ἐπακούωword=ἐπήκουσεν
BOL_lexeme_dict=ὁ, ἡ, τόbook=Judgcase=Nomlemma_translit=olex_utf8=word=
θεὸς
BOL_lexeme_dict=θεός, -οῦ, ὁbook=Judgcase=Nomlemma_translit=theoslex_utf8=θεόςword=θεὸς
τῆς
BOL_lexeme_dict=ὁ, ἡ, τόbook=Judgcase=Genlemma_translit=olex_utf8=word=τῆς
φωνῆς
BOL_lexeme_dict=φωνή, -ῆς, ἡbook=Judgcase=Genlemma_translit=phonelex_utf8=φωνήword=φωνῆς
Μανωε
BOL_lexeme_dict=book=Judgcase=Genlemma_translit=Manoelex_utf8=Μανωεword=Μανωε
καὶ
BOL_lexeme_dict=καίbook=Judgcase=lemma_translit=kailex_utf8=καίword=καὶ
παρεγένετο
BOL_lexeme_dict=παραγίνομαιbook=Judgcase=lemma_translit=paraginomailex_utf8=παραγίνομαιword=παρεγένετο
BOL_lexeme_dict=ὁ, ἡ, τόbook=Judgcase=Nomlemma_translit=olex_utf8=word=
ἄγγελος
BOL_lexeme_dict=ἄγγελος, -ου, ὁbook=Judgcase=Nomlemma_translit=aggeloslex_utf8=ἄγγελοςword=ἄγγελος
τοῦ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Judgcase=Genlemma_translit=olex_utf8=word=τοῦ
θεοῦ
BOL_lexeme_dict=θεός, -οῦ, ὁbook=Judgcase=Genlemma_translit=theoslex_utf8=θεόςword=θεοῦ
ἔτι
BOL_lexeme_dict=ἔτιbook=Judgcase=lemma_translit=etilex_utf8=ἔτιword=ἔτι
πρὸς
BOL_lexeme_dict=πρόςbook=Judgcase=lemma_translit=proslex_utf8=πρόςword=πρὸς
τὴν
BOL_lexeme_dict=ὁ, ἡ, τόbook=Judgcase=Acclemma_translit=olex_utf8=word=τὴν
γυναῖκα
BOL_lexeme_dict=γυνή, -αικός, ἡbook=Judgcase=Acclemma_translit=gunelex_utf8=γυνήword=γυναῖκα
αὐτῆς
BOL_lexeme_dict=αὐτός, -ή, -όbook=Judgcase=Genlemma_translit=autoslex_utf8=αὐτόςword=αὐτῆς
καθημένης
BOL_lexeme_dict=κάθημαιbook=Judgcase=lemma_translit=kathemailex_utf8=κάθημαιword=καθημένης
ἐν
BOL_lexeme_dict=ἐνbook=Judgcase=lemma_translit=enlex_utf8=ἐνword=ἐν
τῷ
BOL_lexeme_dict=ὁ, ἡ, τόbook=Judgcase=Datlemma_translit=olex_utf8=word=τῷ
ἀγρῷ
BOL_lexeme_dict=ἀγρός, -οῦ, ὁbook=Judgcase=Datlemma_translit=agroslex_utf8=ἀγρόςword=ἀγρῷ
καὶ
BOL_lexeme_dict=καίbook=Judgcase=lemma_translit=kailex_utf8=καίword=καὶ
Μανωε
BOL_lexeme_dict=book=Judgcase=Nomlemma_translit=Manoelex_utf8=Μανωεword=Μανωε
BOL_lexeme_dict=ὁ, ἡ, τόbook=Judgcase=Nomlemma_translit=olex_utf8=word=
ἀνὴρ
BOL_lexeme_dict=ἀνήρ, ἀνδρός, ὁbook=Judgcase=Nomlemma_translit=anerlex_utf8=ἀνήρword=ἀνὴρ
αὐτῆς
BOL_lexeme_dict=αὐτός, -ή, -όbook=Judgcase=Genlemma_translit=autoslex_utf8=αὐτόςword=αὐτῆς
οὐκ
BOL_lexeme_dict=οὐbook=Judgcase=lemma_translit=oulex_utf8=οὐword=οὐκ
ἦν
BOL_lexeme_dict=εἰμίbook=Judgcase=lemma_translit=eimilex_utf8=εἰμίword=ἦν
μετ᾿
BOL_lexeme_dict=μετάbook=Judgcase=lemma_translit=metalex_utf8=μετάword=μετ᾿
αὐτῆς
BOL_lexeme_dict=αὐτός, -ή, -όbook=Judgcase=Genlemma_translit=autoslex_utf8=αὐτόςword=αὐτῆς
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

verse 5

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
2Sam 22:7
verse
subverse
ἐν
BOL_lexeme_dict=ἐνbook=2Samcase=lemma_translit=enlex_utf8=ἐνword=ἐν
τῷ
BOL_lexeme_dict=ὁ, ἡ, τόbook=2Samcase=Datlemma_translit=olex_utf8=word=τῷ
θλίβεσθαί
BOL_lexeme_dict=θλίβωbook=2Samcase=lemma_translit=thlibolex_utf8=θλίβωword=θλίβεσθαί
με
BOL_lexeme_dict=book=2Samcase=Acclemma_translit=melex_utf8=μέword=με
ἐπικαλέσομαι
BOL_lexeme_dict=ἐπικαλέωbook=2Samcase=lemma_translit=epikaleolex_utf8=ἐπικαλέωword=ἐπικαλέσομαι
κύριον
BOL_lexeme_dict=κύριος, -ου, ὁbook=2Samcase=Acclemma_translit=kurioslex_utf8=κύριοςword=κύριον
καὶ
BOL_lexeme_dict=καίbook=2Samcase=lemma_translit=kailex_utf8=καίword=καὶ
πρὸς
BOL_lexeme_dict=πρόςbook=2Samcase=lemma_translit=proslex_utf8=πρόςword=πρὸς
τὸν
BOL_lexeme_dict=ὁ, ἡ, τόbook=2Samcase=Acclemma_translit=olex_utf8=word=τὸν
θεόν
BOL_lexeme_dict=θεός, -οῦ, ὁbook=2Samcase=Acclemma_translit=theoslex_utf8=θεόςword=θεόν
μου
BOL_lexeme_dict=book=2Samcase=Genlemma_translit=moulex_utf8=μουword=μου
βοήσομαι
BOL_lexeme_dict=βοάωbook=2Samcase=lemma_translit=boaolex_utf8=βοάωword=βοήσομαι
καὶ
BOL_lexeme_dict=καίbook=2Samcase=lemma_translit=kailex_utf8=καίword=καὶ
ἐπακούσεται
BOL_lexeme_dict=ἐπακούωbook=2Samcase=lemma_translit=epakouolex_utf8=ἐπακούωword=ἐπακούσεται
ἐκ
BOL_lexeme_dict=ἐκ, ἐξbook=2Samcase=lemma_translit=eklex_utf8=ἐκword=ἐκ
ναοῦ
BOL_lexeme_dict=ναός, -οῦ, ὁbook=2Samcase=Genlemma_translit=naoslex_utf8=ναόςword=ναοῦ
αὐτοῦ
BOL_lexeme_dict=αὐτός, -ή, -όbook=2Samcase=Genlemma_translit=autoslex_utf8=αὐτόςword=αὐτοῦ
φωνῆς
BOL_lexeme_dict=φωνή, -ῆς, ἡbook=2Samcase=Genlemma_translit=phonelex_utf8=φωνήword=φωνῆς
μου
BOL_lexeme_dict=book=2Samcase=Genlemma_translit=moulex_utf8=μουword=μου
καὶ
BOL_lexeme_dict=καίbook=2Samcase=lemma_translit=kailex_utf8=καίword=καὶ
BOL_lexeme_dict=ὁ, ἡ, τόbook=2Samcase=Nomlemma_translit=olex_utf8=word=
κραυγή
BOL_lexeme_dict=κραυγή, -ῆς, ἡbook=2Samcase=Nomlemma_translit=kraugelex_utf8=κραυγήword=κραυγή
μου
BOL_lexeme_dict=book=2Samcase=Genlemma_translit=moulex_utf8=μουword=μου
ἐν
BOL_lexeme_dict=ἐνbook=2Samcase=lemma_translit=enlex_utf8=ἐνword=ἐν
τοῖς
BOL_lexeme_dict=ὁ, ἡ, τόbook=2Samcase=Datlemma_translit=olex_utf8=word=τοῖς
ὠσὶν
BOL_lexeme_dict=οὖς, ὠτός, τόbook=2Samcase=Datlemma_translit=ouslex_utf8=οὖςword=ὠσὶν
αὐτοῦ
BOL_lexeme_dict=αὐτός, -ή, -όbook=2Samcase=Genlemma_translit=autoslex_utf8=αὐτόςword=αὐτοῦ
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "Eisakouw = '''\n", "book book#Esth\n", " verse\n", " word lemma_translit=epakouo\n", " word lemma_translit=theos|kurios\n", " word lemma_translit=phone case=Gen\n", "\n", "'''\n", "Eisakouw = LXX.search(Eisakouw)\n", "LXX.show(Eisakouw, start=1, end=100, condensed=True, colorMap={1:'pink'}, extraFeatures={'word', 'lemma_translit', 'case', 'lex_utf8', 'BOL_lexeme_dict'})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# New Feature Development" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Morphology\n", "## MorphGNT:sblgnt Data Description\n", "Here: https://github.com/morphgnt/sblgnt\n" ] }, { "cell_type": "code", "execution_count": 406, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0lemma
0βίβλοςβίβλος
1γένεσιςγένεσις
2ἸησοῦςἸησοῦς
3ΧριστόςΧριστός
4υἱόςυἱός
\n", "
" ], "text/plain": [ " 0 lemma\n", "0 βίβλος βίβλος\n", "1 γένεσις γένεσις\n", "2 Ἰησοῦς Ἰησοῦς\n", "3 Χριστός Χριστός\n", "4 υἱός υἱός" ] }, "execution_count": 406, "metadata": {}, "output_type": "execute_result" } ], "source": [ "translitadd['lemma']=translitadd[0]\n", "translitadd.head(5)" ] }, { "cell_type": "code", "execution_count": 407, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lemma
0βίβλος
1γένεσις
2Ἰησοῦς
3Χριστός
4υἱός
\n", "
" ], "text/plain": [ " lemma\n", "0 βίβλος\n", "1 γένεσις\n", "2 Ἰησοῦς\n", "3 Χριστός\n", "4 υἱός" ] }, "execution_count": 407, "metadata": {}, "output_type": "execute_result" } ], "source": [ "translitadd=translitadd[['lemma']]\n", "translitadd.head(5)" ] }, { "cell_type": "code", "execution_count": 408, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lemmaorig_order
0βίβλος1
1γένεσις2
2Ἰησοῦς3
3Χριστός4
4υἱός5
\n", "
" ], "text/plain": [ " lemma orig_order\n", "0 βίβλος 1\n", "1 γένεσις 2\n", "2 Ἰησοῦς 3\n", "3 Χριστός 4\n", "4 υἱός 5" ] }, "execution_count": 408, "metadata": {}, "output_type": "execute_result" } ], "source": [ "translitadd['orig_order'] = translitadd.index +1\n", "translitadd.head(5)" ] }, { "cell_type": "code", "execution_count": 409, "metadata": {}, "outputs": [], "source": [ "from unidecode import unidecode" ] }, { "cell_type": "code", "execution_count": 410, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "biblos\n" ] } ], "source": [ "s = \"βίβλος\"\n", "s = unidecode(s)\n", "print(s)" ] }, { "cell_type": "code", "execution_count": 411, "metadata": {}, "outputs": [], "source": [ "translitadd['translit'] = translitadd['lemma'].apply(unidecode)" ] }, { "cell_type": "code", "execution_count": 412, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
lemmaorig_ordertranslit
0βίβλος1biblos
1γένεσις2genesis
2Ἰησοῦς3Iesous
3Χριστός4Khristos
4υἱός5uios
\n", "
" ], "text/plain": [ " lemma orig_order translit\n", "0 βίβλος 1 biblos\n", "1 γένεσις 2 genesis\n", "2 Ἰησοῦς 3 Iesous\n", "3 Χριστός 4 Khristos\n", "4 υἱός 5 uios" ] }, "execution_count": 412, "metadata": {}, "output_type": "execute_result" } ], "source": [ "translitadd.head(5)" ] }, { "cell_type": "code", "execution_count": 413, "metadata": {}, "outputs": [], "source": [ "translitadd['translit'].to_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_translit.tf', index=None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# ABC dictionary order\n" ] }, { "cell_type": "code", "execution_count": 414, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
0βίβλος
1γένεσις
2Ἰησοῦς
3Χριστός
4υἱός
5Δαυίδ
6υἱός
7Ἀβραάμ
8Ἀβραάμ
9γεννάω
\n", "
" ], "text/plain": [ " 0\n", "0 βίβλος\n", "1 γένεσις\n", "2 Ἰησοῦς\n", "3 Χριστός\n", "4 υἱός\n", "5 Δαυίδ\n", "6 υἱός\n", "7 Ἀβραάμ\n", "8 Ἀβραάμ\n", "9 γεννάω" ] }, "execution_count": 414, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC1=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\\t',encoding='utf-8')\n", "pd.set_option('display.max_columns', 50)\n", "ABC1.head(10)" ] }, { "cell_type": "code", "execution_count": 415, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0lemma
0βίβλοςβίβλος
1γένεσιςγένεσις
2ἸησοῦςἸησοῦς
3ΧριστόςΧριστός
4υἱόςυἱός
\n", "
" ], "text/plain": [ " 0 lemma\n", "0 βίβλος βίβλος\n", "1 γένεσις γένεσις\n", "2 Ἰησοῦς Ἰησοῦς\n", "3 Χριστός Χριστός\n", "4 υἱός υἱός" ] }, "execution_count": 415, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC1['lemma']=lemma[0]\n", "ABC1.head(5)" ] }, { "cell_type": "code", "execution_count": 416, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0lemmaorig_order
0βίβλοςβίβλος1
1γένεσιςγένεσις2
2ἸησοῦςἸησοῦς3
3ΧριστόςΧριστός4
4υἱόςυἱός5
\n", "
" ], "text/plain": [ " 0 lemma orig_order\n", "0 βίβλος βίβλος 1\n", "1 γένεσις γένεσις 2\n", "2 Ἰησοῦς Ἰησοῦς 3\n", "3 Χριστός Χριστός 4\n", "4 υἱός υἱός 5" ] }, "execution_count": 416, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC1['orig_order'] = ABC1.index +1\n", "ABC1.head(5)" ] }, { "cell_type": "code", "execution_count": 417, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderlemma
01βίβλος
12γένεσις
23Ἰησοῦς
34Χριστός
45υἱός
\n", "
" ], "text/plain": [ " orig_order lemma\n", "0 1 βίβλος\n", "1 2 γένεσις\n", "2 3 Ἰησοῦς\n", "3 4 Χριστός\n", "4 5 υἱός" ] }, "execution_count": 417, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC1=ABC1[['orig_order','lemma']]\n", "ABC1.head(5)" ] }, { "cell_type": "code", "execution_count": 418, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_order
count137554.000000
mean68777.500000
std39708.563801
min1.000000
25%34389.250000
50%68777.500000
75%103165.750000
max137554.000000
\n", "
" ], "text/plain": [ " orig_order\n", "count 137554.000000\n", "mean 68777.500000\n", "std 39708.563801\n", "min 1.000000\n", "25% 34389.250000\n", "50% 68777.500000\n", "75% 103165.750000\n", "max 137554.000000" ] }, "execution_count": 418, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC1.describe()" ] }, { "cell_type": "code", "execution_count": 419, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderlemma
6847968480Αἰγύπτιος
6963369634Αἰθίοψ
7046470465Αἰνέας
5073950740Αἰνών
679680Αἴγυπτος
3081130812Αὐγοῦστος
8758987590Βάαλ
7552075521Βέροια
171172Βαβυλών
128506128507Βαλάκ
\n", "
" ], "text/plain": [ " orig_order lemma\n", "68479 68480 Αἰγύπτιος\n", "69633 69634 Αἰθίοψ\n", "70464 70465 Αἰνέας\n", "50739 50740 Αἰνών\n", "679 680 Αἴγυπτος\n", "30811 30812 Αὐγοῦστος\n", "87589 87590 Βάαλ\n", "75520 75521 Βέροια\n", "171 172 Βαβυλών\n", "128506 128507 Βαλάκ" ] }, "execution_count": 419, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABCdict = ABC1.drop_duplicates(['lemma']).sort_values(by='lemma', ascending=[True])\n", "ABCdict.head(10)" ] }, { "cell_type": "code", "execution_count": 420, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_order
count5461.000000
mean55051.110969
std42441.209940
min1.000000
25%12643.000000
50%48785.000000
75%90141.000000
max137334.000000
\n", "
" ], "text/plain": [ " orig_order\n", "count 5461.000000\n", "mean 55051.110969\n", "std 42441.209940\n", "min 1.000000\n", "25% 12643.000000\n", "50% 48785.000000\n", "75% 90141.000000\n", "max 137334.000000" ] }, "execution_count": 420, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABCdict.describe()" ] }, { "cell_type": "code", "execution_count": 421, "metadata": {}, "outputs": [], "source": [ "ABC1.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/ABC1order.xlsx', encoding='utf-8')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now I am ordering the word alphabetically iwth libreoffice writer since I cannot do that in pandas (yet?).\n" ] }, { "cell_type": "code", "execution_count": 422, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0orig_orderlemmaABC order
02967829679Ἀαρών1
1131340131341Ἀβαδδών2
2100253100254ἀβαρής3
32800128002αββα4
41409414095Ἅβελ5
5108109Ἀβιά6
61952319524Ἀβιαθάρ7
73168231683Ἀβιληνή8
8190191Ἀβιούδ9
978Ἀβραάμ10
\n", "
" ], "text/plain": [ " Unnamed: 0 orig_order lemma ABC order\n", "0 29678 29679 Ἀαρών 1\n", "1 131340 131341 Ἀβαδδών 2\n", "2 100253 100254 ἀβαρής 3\n", "3 28001 28002 αββα 4\n", "4 14094 14095 Ἅβελ 5\n", "5 108 109 Ἀβιά 6\n", "6 19523 19524 Ἀβιαθάρ 7\n", "7 31682 31683 Ἀβιληνή 8\n", "8 190 191 Ἀβιούδ 9\n", "9 7 8 Ἀβραάμ 10" ] }, "execution_count": 422, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ABC2=pd.read_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/ABC2order.xlsx')\n", "pd.set_option('display.max_columns', 50)\n", "ABC2.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we merge the ABCorder dataframe with the original lemma DF." ] }, { "cell_type": "code", "execution_count": 423, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_order_xlemmaUnnamed: 0orig_order_yABC order
01βίβλος01970
126440βίβλος01970
231717βίβλος01970
345660βίβλος01970
464886βίβλος01970
\n", "
" ], "text/plain": [ " orig_order_x lemma Unnamed: 0 orig_order_y ABC order\n", "0 1 βίβλος 0 1 970\n", "1 26440 βίβλος 0 1 970\n", "2 31717 βίβλος 0 1 970\n", "3 45660 βίβλος 0 1 970\n", "4 64886 βίβλος 0 1 970" ] }, "execution_count": 423, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lemma_ABC=pd.merge (ABC1, ABC2,\n", " on='lemma',\n", " how='outer')\n", "lemma_ABC.head(5)" ] }, { "cell_type": "code", "execution_count": 424, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_order_xUnnamed: 0orig_order_yABC order
count137554.000000137554.000000137554.000000137554.00000
mean68777.5000007050.5315667051.5315662676.19798
std39708.56380120152.24899820152.2489981339.74175
min1.0000000.0000001.0000001.00000
25%34389.25000025.00000026.0000001501.00000
50%68777.500000400.000000401.0000002727.00000
75%103165.7500002097.2500002098.2500003598.00000
max137554.000000137333.000000137334.0000005461.00000
\n", "
" ], "text/plain": [ " orig_order_x Unnamed: 0 orig_order_y ABC order\n", "count 137554.000000 137554.000000 137554.000000 137554.00000\n", "mean 68777.500000 7050.531566 7051.531566 2676.19798\n", "std 39708.563801 20152.248998 20152.248998 1339.74175\n", "min 1.000000 0.000000 1.000000 1.00000\n", "25% 34389.250000 25.000000 26.000000 1501.00000\n", "50% 68777.500000 400.000000 401.000000 2727.00000\n", "75% 103165.750000 2097.250000 2098.250000 3598.00000\n", "max 137554.000000 137333.000000 137334.000000 5461.00000" ] }, "execution_count": 424, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lemma_ABC.describe()" ] }, { "cell_type": "code", "execution_count": 425, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_order_xlemmaUnnamed: 0orig_order_yABC order
01βίβλος01970
102γένεσις121074
153Ἰησοῦς232406
9214Χριστός345385
14495υἱός455053
18246Δαυίδ561156
14507υἱός455053
18838Ἀβραάμ7810
18849Ἀβραάμ7810
195610γεννάω9101077
\n", "
" ], "text/plain": [ " orig_order_x lemma Unnamed: 0 orig_order_y ABC order\n", "0 1 βίβλος 0 1 970\n", "10 2 γένεσις 1 2 1074\n", "15 3 Ἰησοῦς 2 3 2406\n", "921 4 Χριστός 3 4 5385\n", "1449 5 υἱός 4 5 5053\n", "1824 6 Δαυίδ 5 6 1156\n", "1450 7 υἱός 4 5 5053\n", "1883 8 Ἀβραάμ 7 8 10\n", "1884 9 Ἀβραάμ 7 8 10\n", "1956 10 γεννάω 9 10 1077" ] }, "execution_count": 425, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lemma_ABC.sort_values(['orig_order_x'], ascending=True).head(10)" ] }, { "cell_type": "code", "execution_count": 426, "metadata": {}, "outputs": [], "source": [ "lemma_ABC.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_abc.xlsx')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Word Frequency" ] }, { "cell_type": "code", "execution_count": 427, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
0βίβλος
1γένεσις
2Ἰησοῦς
3Χριστός
4υἱός
5Δαυίδ
6υἱός
7Ἀβραάμ
8Ἀβραάμ
9γεννάω
10
11Ἰσαάκ
12Ἰσαάκ
13δέ
14γεννάω
15
16Ἰακώβ
17Ἰακώβ
18δέ
19γεννάω
\n", "
" ], "text/plain": [ " 0\n", "0 βίβλος\n", "1 γένεσις\n", "2 Ἰησοῦς\n", "3 Χριστός\n", "4 υἱός\n", "5 Δαυίδ\n", "6 υἱός\n", "7 Ἀβραάμ\n", "8 Ἀβραάμ\n", "9 γεννάω\n", "10 ὁ\n", "11 Ἰσαάκ\n", "12 Ἰσαάκ\n", "13 δέ\n", "14 γεννάω\n", "15 ὁ\n", "16 Ἰακώβ\n", "17 Ἰακώβ\n", "18 δέ\n", "19 γεννάω" ] }, "execution_count": 427, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frequencyadd=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\\t',encoding='utf-8')\n", "pd.set_option('display.max_columns', 50)\n", "frequencyadd.head(20)" ] }, { "cell_type": "code", "execution_count": 428, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderlemma
01βίβλος
12γένεσις
23Ἰησοῦς
34Χριστός
45υἱός
\n", "
" ], "text/plain": [ " orig_order lemma\n", "0 1 βίβλος\n", "1 2 γένεσις\n", "2 3 Ἰησοῦς\n", "3 4 Χριστός\n", "4 5 υἱός" ] }, "execution_count": 428, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frequencyadd['orig_order'] = frequencyadd.index +1\n", "frequencyadd['lemma']=frequencyadd[0]\n", "frequencyadd=frequencyadd[['orig_order','lemma']]\n", "frequencyadd.head(5)" ] }, { "cell_type": "code", "execution_count": 429, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderlemmafreq_lemma
01βίβλος10
12γένεσις5
23Ἰησοῦς906
34Χριστός528
45υἱός375
56Δαυίδ59
67υἱός375
78Ἀβραάμ73
89Ἀβραάμ73
910γεννάω97
101119769
1112Ἰσαάκ20
1213Ἰσαάκ20
1314δέ2766
1415γεννάω97
151619769
1617Ἰακώβ27
1718Ἰακώβ27
1819δέ2766
1920γεννάω97
\n", "
" ], "text/plain": [ " orig_order lemma freq_lemma\n", "0 1 βίβλος 10\n", "1 2 γένεσις 5\n", "2 3 Ἰησοῦς 906\n", "3 4 Χριστός 528\n", "4 5 υἱός 375\n", "5 6 Δαυίδ 59\n", "6 7 υἱός 375\n", "7 8 Ἀβραάμ 73\n", "8 9 Ἀβραάμ 73\n", "9 10 γεννάω 97\n", "10 11 ὁ 19769\n", "11 12 Ἰσαάκ 20\n", "12 13 Ἰσαάκ 20\n", "13 14 δέ 2766\n", "14 15 γεννάω 97\n", "15 16 ὁ 19769\n", "16 17 Ἰακώβ 27\n", "17 18 Ἰακώβ 27\n", "18 19 δέ 2766\n", "19 20 γεννάω 97" ] }, "execution_count": 429, "metadata": {}, "output_type": "execute_result" } ], "source": [ "frequencyadd[\"freq_lemma\"]=frequencyadd.groupby([\"lemma\"])[\"lemma\"].transform(\"count\")\n", "#(\"count\") is actually utilizing the 'count' function!\n", "frequencyadd.head(20)" ] }, { "cell_type": "code", "execution_count": 430, "metadata": {}, "outputs": [], "source": [ "frequencyadd.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_freq.xlsx')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# English Dictionary" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Lets first load the NA1904 BibleOL dictionary:" ] }, { "cell_type": "code", "execution_count": 431, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig abc orderOccurrencesLexemeLexeme_dictStrong's numberStrong's unreliable?gloss
015ἈαρώνἈαρών, ὁ2noAaron
121ἈβαδδώνἈβαδδών, ὁ3noAbaddon
231ἀβαρήςἀβαρής, -ές4nonot burdensome
343ἀββάἀββά, ὁ5noFather
454ἍβελἍβελ, ὁ6noAbel
563ἈβιάἈβιά, ὁ7noAbijah
671ἈβιαθάρἈβιαθάρ, ὁ8noAbiathar
781ἈβιληνήἈβιληνή, -ῆς, ἡ9noAbilene
892ἈβιούδἈβιούδ, ὁ10noAbiud
91073ἈβραάμἈβραάμ, ὁ11noAbraham
10119ἄβυσσοςἄβυσσος, -ου, ἡ12noabyss, unfathomable depth
11122ἍγαβοςἍγαβος, -ου, ὁ13noAgabus
12132ἀγαθοεργέωἀγαθοεργέω14noperform good deeds
13149ἀγαθοποιέωἀγαθοποιέω15nodo that which is good
14151ἀγαθοποιΐαἀγαθοποιΐα, -ας, ἡ16nodoing of that which is good
15161ἀγαθοποιόςἀγαθοποιός, -οῦ, ὁ17noa doer of that which is good
1617102ἀγαθόςἀγαθός, -ή, -όν18nogood
17184ἀγαθωσύνηἀγαθωσύνη, -ης, ἡ19nogoodness
18195ἀγαλλίασιςἀγαλλίασις, -εως, ἡ20noexultation, exhilaration
192011ἀγαλλιάωἀγαλλιάω21noexult, am full of joy
\n", "
" ], "text/plain": [ " orig abc order Occurrences Lexeme Lexeme_dict \\\n", "0 1 5 Ἀαρών Ἀαρών, ὁ \n", "1 2 1 Ἀβαδδών Ἀβαδδών, ὁ \n", "2 3 1 ἀβαρής ἀβαρής, -ές \n", "3 4 3 ἀββά ἀββά, ὁ \n", "4 5 4 Ἅβελ Ἅβελ, ὁ \n", "5 6 3 Ἀβιά Ἀβιά, ὁ \n", "6 7 1 Ἀβιαθάρ Ἀβιαθάρ, ὁ \n", "7 8 1 Ἀβιληνή Ἀβιληνή, -ῆς, ἡ \n", "8 9 2 Ἀβιούδ Ἀβιούδ, ὁ \n", "9 10 73 Ἀβραάμ Ἀβραάμ, ὁ \n", "10 11 9 ἄβυσσος ἄβυσσος, -ου, ἡ \n", "11 12 2 Ἅγαβος Ἅγαβος, -ου, ὁ \n", "12 13 2 ἀγαθοεργέω ἀγαθοεργέω \n", "13 14 9 ἀγαθοποιέω ἀγαθοποιέω \n", "14 15 1 ἀγαθοποιΐα ἀγαθοποιΐα, -ας, ἡ \n", "15 16 1 ἀγαθοποιός ἀγαθοποιός, -οῦ, ὁ \n", "16 17 102 ἀγαθός ἀγαθός, -ή, -όν \n", "17 18 4 ἀγαθωσύνη ἀγαθωσύνη, -ης, ἡ \n", "18 19 5 ἀγαλλίασις ἀγαλλίασις, -εως, ἡ \n", "19 20 11 ἀγαλλιάω ἀγαλλιάω \n", "\n", " Strong's number Strong's unreliable? gloss \n", "0 2 no Aaron \n", "1 3 no Abaddon \n", "2 4 no not burdensome \n", "3 5 no Father \n", "4 6 no Abel \n", "5 7 no Abijah \n", "6 8 no Abiathar \n", "7 9 no Abilene \n", "8 10 no Abiud \n", "9 11 no Abraham \n", "10 12 no abyss, unfathomable depth \n", "11 13 no Agabus \n", "12 14 no perform good deeds \n", "13 15 no do that which is good \n", "14 16 no doing of that which is good \n", "15 17 no a doer of that which is good \n", "16 18 no good \n", "17 19 no goodness \n", "18 20 no exultation, exhilaration \n", "19 21 no exult, am full of joy " ] }, "execution_count": 431, "metadata": {}, "output_type": "execute_result" } ], "source": [ "BOLgreekDICT=pd.read_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/NA1904_dictionary_v1.0.xlsx')\n", "pd.set_option('display.max_columns', 50)\n", "BOLgreekDICT.head(20)" ] }, { "cell_type": "code", "execution_count": 432, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LexemeLexeme_dictStrong's numbergloss
0ἈαρώνἈαρών, ὁ2Aaron
1ἈβαδδώνἈβαδδών, ὁ3Abaddon
2ἀβαρήςἀβαρής, -ές4not burdensome
3ἀββάἀββά, ὁ5Father
4ἍβελἍβελ, ὁ6Abel
5ἈβιάἈβιά, ὁ7Abijah
6ἈβιαθάρἈβιαθάρ, ὁ8Abiathar
7ἈβιληνήἈβιληνή, -ῆς, ἡ9Abilene
8ἈβιούδἈβιούδ, ὁ10Abiud
9ἈβραάμἈβραάμ, ὁ11Abraham
\n", "
" ], "text/plain": [ " Lexeme Lexeme_dict Strong's number gloss\n", "0 Ἀαρών Ἀαρών, ὁ 2 Aaron\n", "1 Ἀβαδδών Ἀβαδδών, ὁ 3 Abaddon\n", "2 ἀβαρής ἀβαρής, -ές 4 not burdensome\n", "3 ἀββά ἀββά, ὁ 5 Father\n", "4 Ἅβελ Ἅβελ, ὁ 6 Abel\n", "5 Ἀβιά Ἀβιά, ὁ 7 Abijah\n", "6 Ἀβιαθάρ Ἀβιαθάρ, ὁ 8 Abiathar\n", "7 Ἀβιληνή Ἀβιληνή, -ῆς, ἡ 9 Abilene\n", "8 Ἀβιούδ Ἀβιούδ, ὁ 10 Abiud\n", "9 Ἀβραάμ Ἀβραάμ, ὁ 11 Abraham" ] }, "execution_count": 432, "metadata": {}, "output_type": "execute_result" } ], "source": [ "BOLgreekDICT=BOLgreekDICT[['Lexeme','Lexeme_dict', 'Strong\\'s number', 'gloss']]\n", "BOLgreekDICT.head(10)" ] }, { "cell_type": "code", "execution_count": 433, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Strong's number
count5433.000000
mean2798.407878
std1638.197697
min1.000000
25%1370.000000
50%2754.000000
75%4237.000000
max5624.000000
\n", "
" ], "text/plain": [ " Strong's number\n", "count 5433.000000\n", "mean 2798.407878\n", "std 1638.197697\n", "min 1.000000\n", "25% 1370.000000\n", "50% 2754.000000\n", "75% 4237.000000\n", "max 5624.000000" ] }, "execution_count": 433, "metadata": {}, "output_type": "execute_result" } ], "source": [ "BOLgreekDICT.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Lets load the SBLGNT lemmas" ] }, { "cell_type": "code", "execution_count": 434, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
0βίβλος
1γένεσις
\n", "
" ], "text/plain": [ " 0\n", "0 βίβλος\n", "1 γένεσις" ] }, "execution_count": 434, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SBLGNTlemmas=pd.read_csv('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/lemma_copy.tf',header=None, delimiter='\\t',encoding='utf-8')\n", "pd.set_option('display.max_columns', 50)\n", "SBLGNTlemmas.head(2)" ] }, { "cell_type": "code", "execution_count": 436, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderLexeme
01βίβλος
12γένεσις
23Ἰησοῦς
34Χριστός
45υἱός
\n", "
" ], "text/plain": [ " orig_order Lexeme\n", "0 1 βίβλος\n", "1 2 γένεσις\n", "2 3 Ἰησοῦς\n", "3 4 Χριστός\n", "4 5 υἱός" ] }, "execution_count": 436, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SBLGNTlemmas['orig_order']=SBLGNTlemmas.index +1\n", "SBLGNTlemmas['Lexeme']=SBLGNTlemmas[0]\n", "SBLGNTlemmas=SBLGNTlemmas[['orig_order','Lexeme']]\n", "SBLGNTlemmas.head(5)" ] }, { "cell_type": "code", "execution_count": 437, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_order
count137554.000000
mean68777.500000
std39708.563801
min1.000000
25%34389.250000
50%68777.500000
75%103165.750000
max137554.000000
\n", "
" ], "text/plain": [ " orig_order\n", "count 137554.000000\n", "mean 68777.500000\n", "std 39708.563801\n", "min 1.000000\n", "25% 34389.250000\n", "50% 68777.500000\n", "75% 103165.750000\n", "max 137554.000000" ] }, "execution_count": 437, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SBLGNTlemmas.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now lets try a merge of the two files" ] }, { "cell_type": "code", "execution_count": 438, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderLexemeLexeme_dictStrong's numbergloss
01.0βίβλοςNaNNaNNaN
126440.0βίβλοςNaNNaNNaN
231717.0βίβλοςNaNNaNNaN
345660.0βίβλοςNaNNaNNaN
464886.0βίβλοςNaNNaNNaN
\n", "
" ], "text/plain": [ " orig_order Lexeme Lexeme_dict Strong's number gloss\n", "0 1.0 βίβλος NaN NaN NaN\n", "1 26440.0 βίβλος NaN NaN NaN\n", "2 31717.0 βίβλος NaN NaN NaN\n", "3 45660.0 βίβλος NaN NaN NaN\n", "4 64886.0 βίβλος NaN NaN NaN" ] }, "execution_count": 438, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SBLGNTglosses=pd.merge (SBLGNTlemmas,BOLgreekDICT,\n", " on='Lexeme',\n", " how='outer')\n", "SBLGNTglosses.head(5)" ] }, { "cell_type": "code", "execution_count": 439, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderStrong's number
count138318.00000054761.000000
mean68756.5903793008.488377
std39712.5326781209.032138
min1.0000001.000000
25%34366.2500002041.000000
50%68753.5000003588.000000
75%103162.7500003706.000000
max137554.0000005624.000000
\n", "
" ], "text/plain": [ " orig_order Strong's number\n", "count 138318.000000 54761.000000\n", "mean 68756.590379 3008.488377\n", "std 39712.532678 1209.032138\n", "min 1.000000 1.000000\n", "25% 34366.250000 2041.000000\n", "50% 68753.500000 3588.000000\n", "75% 103162.750000 3706.000000\n", "max 137554.000000 5624.000000" ] }, "execution_count": 439, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SBLGNTglosses.describe()" ] }, { "cell_type": "code", "execution_count": 440, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orig_orderLexemeLexeme_dictStrong's numbergloss
01.0βίβλοςNaNNaNNaN
126440.0βίβλοςNaNNaNNaN
231717.0βίβλοςNaNNaNNaN
345660.0βίβλοςNaNNaNNaN
464886.0βίβλοςNaNNaNNaN
568873.0βίβλοςNaNNaNNaN
676865.0βίβλοςNaNNaNNaN
7107214.0βίβλοςNaNNaNNaN
8128928.0βίβλοςNaNNaNNaN
9136490.0βίβλοςNaNNaNNaN
102.0γένεσιςNaNNaNNaN
11281.0γένεσιςNaNNaNNaN
1229821.0γένεσιςNaNNaNNaN
13120472.0γένεσιςNaNNaNNaN
14121080.0γένεσιςNaNNaNNaN
153.0ἸησοῦςἸησοῦς2424.0Jesus
16243.0ἸησοῦςἸησοῦς2424.0Jesus
17278.0ἸησοῦςἸησοῦς2424.0Jesus
18357.0ἸησοῦςἸησοῦς2424.0Jesus
19436.0ἸησοῦςἸησοῦς2424.0Jesus
\n", "
" ], "text/plain": [ " orig_order Lexeme Lexeme_dict Strong's number gloss\n", "0 1.0 βίβλος NaN NaN NaN\n", "1 26440.0 βίβλος NaN NaN NaN\n", "2 31717.0 βίβλος NaN NaN NaN\n", "3 45660.0 βίβλος NaN NaN NaN\n", "4 64886.0 βίβλος NaN NaN NaN\n", "5 68873.0 βίβλος NaN NaN NaN\n", "6 76865.0 βίβλος NaN NaN NaN\n", "7 107214.0 βίβλος NaN NaN NaN\n", "8 128928.0 βίβλος NaN NaN NaN\n", "9 136490.0 βίβλος NaN NaN NaN\n", "10 2.0 γένεσις NaN NaN NaN\n", "11 281.0 γένεσις NaN NaN NaN\n", "12 29821.0 γένεσις NaN NaN NaN\n", "13 120472.0 γένεσις NaN NaN NaN\n", "14 121080.0 γένεσις NaN NaN NaN\n", "15 3.0 Ἰησοῦς Ἰησοῦς 2424.0 Jesus\n", "16 243.0 Ἰησοῦς Ἰησοῦς 2424.0 Jesus\n", "17 278.0 Ἰησοῦς Ἰησοῦς 2424.0 Jesus\n", "18 357.0 Ἰησοῦς Ἰησοῦς 2424.0 Jesus\n", "19 436.0 Ἰησοῦς Ἰησοῦς 2424.0 Jesus" ] }, "execution_count": 440, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SBLGNTglosses.head(20)" ] }, { "cell_type": "code", "execution_count": 441, "metadata": {}, "outputs": [], "source": [ "SBLGNTglosses.to_excel('d:/OneDrive/1200_AUS-research/Fabric-TEXT/0_data_SBLGNT/feature-dev/SBLGNTglosses.xlsx')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": { "height": "calc(100% - 180px)", "left": "10px", "top": "150px", "width": "165px" }, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 4 }