{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import re\n", "from tf.fabric import Fabric\n", "from tf.app import use" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This is Text-Fabric 7.8.2\n", "Api reference : https://annotation.github.io/text-fabric/Api/Fabric/\n", "\n", "10 features found and 0 ignored\n" ] } ], "source": [ "TF = Fabric(locations='~/github/annotation//banks/tf/0.2')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "allFeatures = TF.explore(silent=True, show=True)\n", "loadableFeatures = allFeatures['nodes'] + allFeatures['edges']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We load all features:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.00s loading features ...\n", " 0.02s All features loaded/computed - for details use loadLog()\n" ] } ], "source": [ "api = TF.load(loadableFeatures)\n", "docs = api.makeAvailableIn(globals())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = '''\n", "line\n", ".number=number. sentence\n", "'''\n", "results = list(S.search(query))\n", "len(results)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "line 103 has number 1 = 1 of sentence 115\n", "line 110 has number 1 = 1 of sentence 115\n", "line 104 has number 2 = 2 of sentence 116\n", "line 111 has number 2 = 2 of sentence 116\n", "line 103 has number 1 = 1 of sentence 117\n", "line 110 has number 1 = 1 of sentence 117\n" ] } ], "source": [ "for (l, s) in results:\n", " print(f'{F.otype.v(l)} {l} has number {F.number.v(l)} = {F.number.v(s)} of {F.otype.v(s)} {s}')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(0, '.number=number.', 1)]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "S.exe.qedgesRaw" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This is Text-Fabric 7.8.2\n", "Api reference : https://annotation.github.io/text-fabric/Api/Fabric/\n", "\n", "7 features found and 0 ignored\n", " 0.00s Warp feature \"otext\" not found. Working without Text-API\n", "\n" ] } ], "source": [ "TF = Fabric(locations='/Users/dirk/github/annotation/text-fabric/test/generic/tf')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "allFeatures = TF.explore(silent=True, show=True)\n", "loadableFeatures = allFeatures['nodes'] + allFeatures['edges']" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.00s loading features ...\n", " | 0.00s No section config in otext, the section part of the T-API cannot be used\n", " | 0.00s No structure info in otext, the structure part of the T-API cannot be used\n", " 0.02s All features loaded/computed - for details use loadLog()\n" ] } ], "source": [ "api = TF.load(loadableFeatures)\n", "docs = api.makeAvailableIn(globals())" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(('special', 1), ('ss-peculiar-ss', 1), ('ss-special-ss', 1))" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "F.namesign.freqList()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "query = '''\n", "part name=s1\n", ".namepart=namesign. sign name=a\n", "'''" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(11, 1)]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = list(S.search(query))\n", "results" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "query = '''\n", "sign name=a\n", ".namesign=namepart. part name=s1\n", "'''" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = list(S.search(query))\n", "results" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "query = '''\n", "sign name=b\n", ".namesign~(^[sp][sp]-)|(-[sp][sp]$)~namepart. part name=s2\n", "'''" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b\n", "ss-special-ss\n", "special\n", "s2\n", "pp-special-pp\n", "special\n" ] } ], "source": [ "r = re.compile(r'(^[sp][sp]-)|(-[sp][sp]$)')\n", "print(F.name.v(2))\n", "print(F.namesign.v(2))\n", "print(r.sub('', F.namesign.v(2)))\n", "print(F.name.v(12))\n", "print(F.namepart.v(12))\n", "print(r.sub('', F.namepart.v(12)))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(2, 12)]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = list(S.search(query))\n", "results" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "query = '''\n", "sign name=b\n", ".namesign=namesign. sign name=b\n", "'''" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = list(S.search(query))\n", "results" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "query = '''\n", "sign name=b\n", ".namesign~(^[sp]{2}-)|(-[sp]{2}$)~namepart. part name=s2\n", "'''" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(2, 12)]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = list(S.search(query))\n", "results" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "query = '''\n", "sign\n", ".number~[A-Z]~name. sign\n", ".number3\n", "'''" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0 \n", " 1 sign name>3\n", " 2 \n", "Feature \"name\" has wrong values:\n", " \". at 0x7fe28891d378>\" is not a number: line(s) 1\n" ] }, { "data": { "text/plain": [ "[]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = list(S.search(query))\n", "results" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict_items([(1, 1), (2, 2), (3, 3), (4, 4), (11, 1), (12, 2), (13, 3), (14, 4), (15, 5), (16, 6), (17, 7), (18, 8), (19, 9)])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "F.number.data.items()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dict_items([(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h'), (9, 'i'), (10, 'j'), (11, 's1'), (12, 's2'), (13, 's3'), (14, 's4'), (15, 's5'), (16, 's6'), (17, 's7'), (18, 's8'), (19, 's9'), (20, 's10'), (21, 'ss1'), (22, 'ss2'), (23, 'ss3'), (24, 'ss4'), (25, 'ss5'), (26, 'ss6'), (27, 'ss7'), (28, 'ss8'), (29, 'ss9'), (30, 'ss10'), (31, 'd1'), (32, 'd2'), (33, 'd3'), (34, 'd4'), (35, 'd5'), (36, 't1'), (37, 't2'), (38, 't3'), (39, 't4'), (40, 'q1'), (41, 'q2'), (42, 'q3'), (43, 'u1'), (44, 'u2'), (45, 'lower_a'), (46, 'lower_b'), (47, 'lower_c'), (48, 'upper_a'), (49, 'upper_b'), (50, 'upper_c'), (51, 'lower'), (52, 'upper'), (53, 'odd'), (54, 'even'), (55, 'big'), (56, 'small1'), (57, 'small2'), (58, 'small3'), (59, 'small4'), (60, 'small5'), (61, 'small6'), (62, 'small7'), (63, 'small8'), (64, 'john'), (65, 'mary'), (66, 'fred'), (67, 'jim'), (68, 'jim1'), (69, 'jim2'), (70, 'jim3'), (71, 'tim'), (72, 'tom'), (73, 'tom1n'), (74, 'tom1p'), (75, 'tom2n'), (76, 'tom2p'), (77, 'timb'), (78, 'tomb'), (79, 'tomb1n'), (80, 'tomb1p'), (81, 'tomb2n'), (82, 'tomb2p'), (83, 'time'), (84, 'tome'), (85, 'tome1n'), (86, 'tome1p'), (87, 'tome2n'), (88, 'tome2p'), (89, 'all')])" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "F.name.data.items()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "TF-app: ~/text-fabric-data/annotation/app-bhsa/code" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/text-fabric-data/etcbc/bhsa/tf/2021" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/text-fabric-data/etcbc/phono/tf/2021" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/text-fabric-data/etcbc/parallels/tf/2021" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "This is Text-Fabric 9.1.5\n", "Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html\n", "\n", "122 features found and 0 ignored\n" ] }, { "data": { "text/html": [ "Text-Fabric: Text-Fabric API 9.1.5, app-bhsa v3, Search Reference
Data: BHSA, Character table, Feature docs
Features:
Parallel Passagescrossref
BHSA = Biblia Hebraica Stuttgartensia Amstelodamensisbook
book@ll
chapter
code
det
domain
freq_lex
function
g_cons
g_cons_utf8
g_lex
g_lex_utf8
g_word
g_word_utf8
gloss
gn
label
language
lex
lex_utf8
ls
nametype
nme
nu
number
otype
pargr
pdp
pfm
prs
prs_gn
prs_nu
prs_ps
ps
qere
qere_trailer
qere_trailer_utf8
qere_utf8
rank_lex
rela
sp
st
tab
trailer
trailer_utf8
txt
typ
uvf
vbe
vbs
verse
voc_lex
voc_lex_utf8
vs
vt
mother
omap@ll
oslots
Phonetic Transcriptionsphono
phono_trailer
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Text-Fabric API: names N F E L T S C TF directly usable

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "A = use('bhsa', hoist=globals())" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "query = \"\"\"\n", "verse book=Genesis chapter=1\n", " c1:clause number=2\n", " p1:phrase\n", "\n", "c1 .number>number. p1\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.34s 20 results\n" ] } ], "source": [ "results = A.search(query)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/html": [ "

result 1

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
verse
book=Genesischapter=1
sentence
number=8
clause
number=1
phrase
number=1
number=40
phrase
number=2
phrase
number=3
phrase
number=4
number=43
number=44
number=45
clause
number=2
phrase
number=1
number=46
phrase
number=2
number=47
sentence
number=9
clause
number=1
phrase
number=1
number=48
phrase
number=2
phrase
number=3
phrase
number=4
number=51
number=52
number=53
number=54
number=55
number=56
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "A.show(results, start=1, end=1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "query = '''\n", "c:clause\n", " PreGap:phrase_atom\n", " LastPhrase:phrase_atom\n", " :=\n", "\n", "Gap:clause_atom\n", " :: word\n", "\n", "PreGap < Gap\n", "Gap < LastPhrase\n", "c || Gap\n", "'''" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.00s Checking search template ...\n", " 0.00s Setting up search space for 5 objects ...\n", " 0.19s Constraining search space with 8 relations ...\n", " 0.64s \t2 edges thinned\n", " 0.64s Setting up retrieval plan ...\n", " 0.65s Ready to deliver results from 454173 nodes\n", "Iterate over S.fetch() to get the results\n", "See S.showPlan() to interpret the results\n" ] } ], "source": [ "S.study(query)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Search with 5 objects and 8 relations\n", "Results are instantiations of the following objects:\n", "node 0-clause ( 88121 choices)\n", "node 1-phrase_atom (267541 choices)\n", "node 2-phrase_atom ( 88121 choices)\n", "node 3-clause_atom ( 5195 choices)\n", "node 4-word ( 5195 choices)\n", "Performance parameters:\n", "\tyarnRatio = 1.25\n", "\ttryLimitFrom = 40\n", "\ttryLimitTo = 40\n", "Instantiations are computed along the following relations:\n", "node 3-clause_atom ( 5195 choices)\n", "edge 3-clause_atom :: 4-word ( 1.0 choices (thinned))\n", "edge 4-word ]] 3-clause_atom ( 1.0 choices)\n", "edge 3-clause_atom < 2-phrase_atom ( 44060.5 choices)\n", "edge 2-phrase_atom := 0-clause ( 1.0 choices (thinned))\n", "edge 0-clause [[ 2-phrase_atom ( 1.0 choices)\n", "edge 0-clause || 3-clause_atom ( 4675.5 choices)\n", "edge 0-clause [[ 1-phrase_atom ( 3.0 choices)\n", "edge 1-phrase_atom < 3-clause_atom ( 2597.5 choices)\n", " 14s The results are connected to the original search template as follows:\n", " 0 \n", " 1 R0 c:clause\n", " 2 R1 PreGap:phrase_atom\n", " 3 R2 LastPhrase:phrase_atom\n", " 4 :=\n", " 5 \n", " 6 R3 Gap:clause_atom\n", " 7 R4 :: word\n", " 8 \n", " 9 PreGap < Gap\n", "10 Gap < LastPhrase\n", "11 c || Gap\n", "12 \n" ] } ], "source": [ "S.showPlan(details=True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.00s Counting results per 1 up to 4 ...\n", " | 3.76s 1\n", " | 10s 2\n", " | 32s 3\n", " | 33s 4\n", " 33s Done: 4 results\n" ] } ], "source": [ "S.count(progress=1, limit=4)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 }