{ "cells": [ { "cell_type": "code", "execution_count": 4, "id": "9819d1fc-cd0e-4528-a911-642194b3b5ab", "metadata": {}, "outputs": [], "source": [ "from tf.app import collect" ] }, { "cell_type": "code", "execution_count": 13, "id": "2a61998f-4d24-4ef1-a789-4b00ea9d7fa3", "metadata": {}, "outputs": [], "source": [ "corpora = dict(\n", " # Descartes=(\"github\", \"CLARIAH\", \"descartes-tf\"),\n", " # FerdinandHuyck=(\"github\", \"CLARIAH\", \"wp6-ferdinandhuyck\"),\n", " # Missieven=(\"github\", \"CLARIAH\", \"wp6-missieven\"),\n", " # Daghregisters=(\"github\", \"CLARIAH\", \"wp6-daghregisters\"),\n", " # BHSA=(\"github\", \"ETCBC\", \"bhsa\"),\n", " # DSS=(\"github\", \"ETCBC\", \"dss\"),\n", " # Dhammapada=(\"github\", \"ETCBC\", \"dhammapada\"),\n", " # N1904=(\"github\", \"ETCBC\", \"nestle1904\"),\n", " # Peshitta=(\"github\", \"ETCBC\", \"peshitta\"),\n", " # SyrNT=(\"github\", \"ETCBC\", \"syrnt\"),\n", " # NinMed=(\"github\", \"Nino-cunei\", \"ninmed\"),\n", " # OldBabylonian=(\"github\", \"Nino-cunei\", \"oldbabylonian\"),\n", " # OldAssyrian=(\"github\", \"Nino-cunei\", \"oldassyrian\"),\n", " # Uruk=(\"github\", \"Nino-cunei\", \"uruk\"),\n", " Athenaeus=(\"github\", \"pthu\", \"athenaeus\"),\n", " # Quran=(\"github\", \"q-ran\", \"quran\"),\n", " Fusus=(\"github\", \"among\", \"fusus\"),\n", ")\n", "otherCorpora = dict(\n", " LXX=(\"github\", \"CenterBLC\", \"LXX\"),\n", " NA=(\"github\", \"CenterBLC\", \"NA\"),\n", " SBLGNT=(\"github\", \"CenterBLC\", \"SBLGNT\"),\n", " Tischendorf=(\"codykingham\", \"tischendorf_tf\"),\n", " SamaritanPentateuch=(\"DT-UCPH\", \"sp\"),\n", " Ugaritic=(\"DT-UCPH\", \"cuc\"),\n", ")" ] }, { "cell_type": "code", "execution_count": 22, "id": "62e895ba-b2f8-4d11-90ff-ac1c068a07ca", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "=== Descartes ===>\n" ] }, { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/CLARIAH/descartes-tf/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/CLARIAH/descartes-tf/tf/1.1" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/CLARIAH/descartes-tf/parallels/tf/1.1" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.4, CLARIAH/descartes-tf/app v3, Search Reference
\n", " Data: CLARIAH - descartes-tf 1.1, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
volume885241.88100
letter725940.60100
page2884236.45100
postscriptum5646.790
opener5451.970
closer54113.101
address8615.220
head72523.372
p843880.82100
sentence1307450.1496
hi59724.634
formula62001.211
figure3191.000
word6819351.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
CLARIAH/descartes-tf/parallels/tf\n", "
\n", "\n", "
\n", "
\n", "sim\n", "
\n", "
int
\n", "\n", " similarity between sentences based on the Levenshtein ratio\n", "\n", "
\n", "\n", "
\n", "
\n", "\n", "
Descartes = Descartes, all letters\n", "
\n", "\n", "
\n", "
\n", "alt_date\n", "
\n", "
str
\n", "\n", " alternative date of a letter\n", "\n", "
\n", "\n", "
\n", "
\n", "alt_id\n", "
\n", "
str
\n", "\n", " alternative ids of a letter, comma separated\n", "\n", "
\n", "\n", "
\n", "
\n", "cert\n", "
\n", "
str
\n", "\n", " certainty of something\n", "\n", "
\n", "\n", "
\n", "
\n", "date\n", "
\n", "
str
\n", "\n", " date of a letter\n", "\n", "
\n", "\n", "
\n", "
\n", "id\n", "
\n", "
str
\n", "\n", " id of a letter\n", "\n", "
\n", "\n", "
\n", "
\n", "intermediary\n", "
\n", "
str
\n", "\n", " person involved in the transmission of the letter from sender to receiver\n", "\n", "
\n", "\n", "
\n", "
\n", "isitalic\n", "
\n", "
str
\n", "\n", " whether the word is in italic\n", "\n", "
\n", "\n", "
\n", "
\n", "ismargin\n", "
\n", "
str
\n", "\n", " whether the word is in the margin\n", "\n", "
\n", "\n", "
\n", "
\n", "issub\n", "
\n", "
str
\n", "\n", " whether the word is in subscript\n", "\n", "
\n", "\n", "
\n", "
\n", "issup\n", "
\n", "
str
\n", "\n", " whether the word is in supscript\n", "\n", "
\n", "\n", "
\n", "
\n", "language\n", "
\n", "
str
\n", "\n", " language of a letter\n", "\n", "
\n", "\n", "
\n", "
\n", "level\n", "
\n", "
str
\n", "\n", " level of a paragraph when it acts like a heading\n", "\n", "
\n", "\n", "
\n", "
\n", "n\n", "
\n", "
int
\n", "\n", " number of whatever element\n", "\n", "
\n", "\n", "
\n", "
\n", "notation\n", "
\n", "
str
\n", "\n", " notation method of a formula\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "punc\n", "
\n", "
str
\n", "\n", " nonword chars after a word \n", "\n", "
\n", "\n", "
\n", "
\n", "recipient\n", "
\n", "
str
\n", "\n", " recipient of a letter\n", "\n", "
\n", "\n", "
\n", "
\n", "recipientloc\n", "
\n", "
str
\n", "\n", " location from where a letter was received\n", "\n", "
\n", "\n", "
\n", "
\n", "resp\n", "
\n", "
str
\n", "\n", " person responsible for something\n", "\n", "
\n", "\n", "
\n", "
\n", "sender\n", "
\n", "
str
\n", "\n", " sender of a letter\n", "\n", "
\n", "\n", "
\n", "
\n", "senderloc\n", "
\n", "
str
\n", "\n", " location from where a letter was sent\n", "\n", "
\n", "\n", "
\n", "
\n", "tex\n", "
\n", "
str
\n", "\n", " unformatted TeX code of a formula, without the `$`\n", "\n", "
\n", "\n", "
\n", "
\n", "trans\n", "
\n", "
str
\n", "\n", " transcription of a word \n", "\n", "
\n", "\n", "
\n", "
\n", "typ\n", "
\n", "
str
\n", "\n", " kind of a node; \"empty\"; \"formula\", \"head\", \"symbol\", \"illustration\"\n", "\n", "
\n", "\n", "
\n", "
\n", "url\n", "
\n", "
str
\n", "\n", " url of a graphic node\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: CLARIAH/descartes-tf
  3. appPath: /Users/me/github/CLARIAH/descartes-tf/app
  4. commit: no value
  5. css:.bold {
    font-weight: bold;
    }
    .italic {
    font-style: italic;
    }
    .margin {
    position: relative;
    top: -0.3em;
    font-weight: bold;
    color: #0000ee;
    }
    .sub {
    vertical-align: sub;
    font-size: small;
    }
    .sup {
    vertical-align: super;
    font-size: small;
    }
  6. dataDisplay:
    • exampleSectionHtml: <code>letter 1:1001</code>
    • textFormats: {layout-orig-full: {method: layoutOrig}}
  7. docs:
    • docPage: about
    • featureBase:https://github.com/{org}/{repo}/blob/main/docs/transcription{docExt}
    • featurePage: ''
  8. interfaceDefaults:
    • showGraphics: True
    • showMath: True
    • standardFeatures: 0
    • withLabels: True
  9. isCompatible: True
  10. local: clone
  11. localDir: /Users/me/github/CLARIAH/descartes-tf/_temp
  12. provenanceSpec:
    • corpus: Descartes = Descartes, all letters
    • graphicsRelative: source/illustrations
    • moduleSpecs:
    • \n", " :\n", "
      • backend: no value
      • corpus: Similar Sentences
      • docUrl: no value
      • doi: no value
      • org: CLARIAH
      • relative: parallels/tf
      • repo: descartes-tf
      \n", "
    • org: CLARIAH
    • relative: /tf
    • repo: descartes-tf
    • version: 1.1
    • webBase:http://emlo-portal.bodleian.ox.ac.uk/collections/?catalogue=rene-descartes
    • webHint: See how this corpus is included in the Bodleian catalog
  13. release: no value
  14. typeDisplay:
    • figure:
      • features: url
      • graphics: True
    • formula:
      • features: tex
      • label: {notation}
    • letter:
      • features: senderloc recipientloc
      • label: {id} {date} from {sender} to {recipient}
      • template: {id} {date} from {sender} to {recipient}
    • p:
      • condense: True
      • label: {n}
    • page:
      • label: {n}
      • template: p. {n}
    • sentence: {label: {n}}
    • volume:
      • label: {n}
      • template: vol. {n}
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/CLARIAH/descartes-tf/source/illustrations" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Found 5 symbols
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Found 310 illustrations
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Data to be zipped:\n", "\tOK app (v1.1 285d31) : ~/github/CLARIAH/descartes-tf/app\n", "\tOK main data (v1.1 285d31) : ~/github/CLARIAH/descartes-tf/tf/1.1\n", "\tOK graphics (v1.1 285d31) : ~/github/CLARIAH/descartes-tf/source/illustrations\n", "\tOK module /parallels/tf (v1.1 285d31) : ~/github/CLARIAH/descartes-tf/parallels/tf/1.1\n", "Writing zip file ...\n", " 0.51s ~/Downloads/github/CLARIAH/descartes-tf/complete.zip\n", "=== FerdinandHuyck ===>\n" ] }, { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/CLARIAH/wp6-ferdinandhuyck/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/CLARIAH/wp6-ferdinandhuyck/tf/0.1" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.4, CLARIAH/wp6-ferdinandhuyck/app v3, Search Reference
\n", " Data: CLARIAH - wp6-ferdinandhuyck 0.1, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
text1218025.00100
body1218018.00100
div425190.90100
chapter444963.18100
fileDesc1299.000
editionStmt1268.000
p372558.0499
chunk383356.93100
lg4123.340
ebook121.000
pod121.000
note920.890
sourceDesc116.000
bibl213.000
revisionDesc112.000
q279.040
head868.400
titleStmt18.000
l1227.800
interpGrp17.000
change26.000
publicationStmt15.000
title35.000
item24.000
hi6023.501
author33.000
imprint23.000
encodingDesc12.000
notesStmt12.000
order22.000
availability31.670
name2681.210
idno91.110
blurb21.000
colofon21.000
date41.000
figure51.000
interp71.000
price21.000
pubPlace21.000
publisher21.000
respStmt21.000
titlepage21.000
xptr51.000
word2183801.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
CLARIAH - wp6-ferdinandhuyck\n", "
\n", "\n", "
\n", "
\n", "after\n", "
\n", "
str
\n", "\n", " the text after a word till the next word\n", "\n", "
\n", "\n", "
\n", "
\n", "chapter\n", "
\n", "
str
\n", "\n", " name of chapter\n", "\n", "
\n", "\n", "
\n", "
\n", "chunk\n", "
\n", "
int
\n", "\n", " number of a chunk within a file\n", "\n", "
\n", "\n", "
\n", "
\n", "curr\n", "
\n", "
str
\n", "\n", " this is TEI attribute curr\n", "\n", "
\n", "\n", "
\n", "
\n", "empty\n", "
\n", "
int
\n", "\n", " whether a slot has been inserted in an empty element\n", "\n", "
\n", "\n", "
\n", "
\n", "empty_lb\n", "
\n", "
int
\n", "\n", " empty TEI element lb follows\n", "\n", "
\n", "\n", "
\n", "
\n", "empty_link\n", "
\n", "
int
\n", "\n", " empty TEI element link follows\n", "\n", "
\n", "\n", "
\n", "
\n", "empty_pb\n", "
\n", "
int
\n", "\n", " empty TEI element pb follows\n", "\n", "
\n", "\n", "
\n", "
\n", "empty_pb_n\n", "
\n", "
str
\n", "\n", " TEI attribute n of empty element pb\n", "\n", "
\n", "\n", "
\n", "
\n", "is_meta\n", "
\n", "
str
\n", "\n", " whether a slot or word is in the teiHeader element\n", "\n", "
\n", "\n", "
\n", "
\n", "is_note\n", "
\n", "
str
\n", "\n", " whether a slot or word is in the note element\n", "\n", "
\n", "\n", "
\n", "
\n", "n\n", "
\n", "
str
\n", "\n", " this is TEI attribute n\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "place\n", "
\n", "
str
\n", "\n", " this is TEI attribute place\n", "\n", "
\n", "\n", "
\n", "
\n", "rend\n", "
\n", "
str
\n", "\n", " this is TEI attribute rend\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_1tab\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as 1tab\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_b\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as b\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_bq\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as bq\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_h2\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as h2\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_h3\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as h3\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_h4\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as h4\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_i\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as i\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_sc\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as sc\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_spat\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as spat\n", "\n", "
\n", "\n", "
\n", "
\n", "rend_sup\n", "
\n", "
int
\n", "\n", " whether text is to be rendered as sup\n", "\n", "
\n", "\n", "
\n", "
\n", "str\n", "
\n", "
str
\n", "\n", " the text of a word\n", "\n", "
\n", "\n", "
\n", "
\n", "to\n", "
\n", "
str
\n", "\n", " this is TEI attribute to\n", "\n", "
\n", "\n", "
\n", "
\n", "type\n", "
\n", "
str
\n", "\n", " this is TEI attribute type\n", "\n", "
\n", "\n", "
\n", "
\n", "value\n", "
\n", "
str
\n", "\n", " this is TEI attribute value\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: CLARIAH/wp6-ferdinandhuyck
  3. appPath: /Users/me/github/CLARIAH/wp6-ferdinandhuyck/app
  4. commit: no value
  5. css:.r_.r_italic,.r_.r_italics {
    font-style: italic;
    color: #000000;
    }
    .r_.r_bold {
    font-weight: bold;
    color: #000000;
    }
    .r_.r_underline {
    text-decoration: underline;
    color: #000000;
    }
    .r_.r_center {
    text-align: center;
    color: #000000;
    }
    .r_.r_large {
    font-size: large;
    color: #000000;
    }
    .r_.r_spaced {
    letter-spacing: .2rem;
    color: #000000;
    }
    .r_.r_margin {
    position: relative;
    top: -0.3em;
    font-weight: bold;
    color: #0000ee;
    }
    .r_.r_above {
    position: relative;
    top: -0.3em;
    color: #000000;
    }
    .r_.r_below {
    position: relative;
    top: 0.3em;
    color: #000000;
    }
    .r_.r_sub {
    vertical-align: sub;
    font-size: small;
    color: #000000;
    }
    .r_.r_sup, .r_.r_super {
    vertical-align: super;
    font-size: small;
    color: #000000;
    }
    .r_ {
    color: #dd9900;
    }
    .is_meta {
    font-family: monospace;
    color: #008800;
    }
    .is_note {
    font-size: small;
    color: #dd0055;
    }
  6. dataDisplay:
    • excludedFeatures: []
    • noneValues:
      • none
      • unknown
      • no value
      • NA
    • sectionSep1: @
    • textFormats: {layout-orig-full: {method: layout}}
  7. docs:
    • docPage: about
    • featureBase: {docBase}/transcription.md
    • featurePage: transcription
  8. interfaceDefaults: {fmt: layout-orig-full}
  9. isCompatible: True
  10. local: clone
  11. localDir: /Users/me/github/CLARIAH/wp6-ferdinandhuyck/_temp
  12. provenanceSpec:
    • branch: main
    • corpus: {org} - {repo}
    • doi: 10.5281/zenodo.nnnnnn
    • moduleSpecs: []
    • org: CLARIAH
    • relative: /tf
    • repo: wp6-ferdinandhuyck
    • version: 0.1
    • webBase: https://public.{org}.org/{repo}
    • webHint: Show this on the website
    • webLang: en
    • webUrl:{webBase}/<1>/<2>/<3>&version={version}
    • webUrlLex: {webBase}/word?version={version}&id=<lid>
  13. release: no value
  14. typeDisplay: {word: {base: True}}
  15. writing: ''
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Data to be zipped:\n", "\tOK app (v0.1 c11480) : ~/github/CLARIAH/wp6-ferdinandhuyck/app\n", "\tOK main data (v0.1 c11480) : ~/github/CLARIAH/wp6-ferdinandhuyck/tf/0.1\n", "Writing zip file ...\n", " 0.15s ~/Downloads/github/CLARIAH/wp6-ferdinandhuyck/complete.zip\n", "=== Daghregisters ===>\n" ] }, { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/CLARIAH/wp6-daghregisters/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/CLARIAH/wp6-daghregisters/tf/0.1" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.4, CLARIAH/wp6-daghregisters/app v3, Search Reference
\n", " Data: CLARIAH - wp6-daghregisters 0.1, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
volume1229055.00100
page483474.23100
para419354.63100
line2029111.29100
word2290551.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
Dagh Registers Dutch East India Company 1640-1641\n", "
\n", "\n", "
\n", "
\n", "dayfrom\n", "
\n", "
int
\n", "\n", " first day covered by a page\n", "\n", "
\n", "\n", "
\n", "
\n", "dayto\n", "
\n", "
int
\n", "\n", " last day covered by a page\n", "\n", "
\n", "\n", "
\n", "
\n", "head\n", "
\n", "
str
\n", "\n", " raw contents of the header line of each page\n", "\n", "
\n", "\n", "
\n", "
\n", "letters\n", "
\n", "
str
\n", "\n", " text string of a word without punctuation\n", "\n", "
\n", "\n", "
\n", "
\n", "letterx\n", "
\n", "
str
\n", "\n", " word with split points marked by ┼\n", "\n", "
\n", "\n", "
\n", "
\n", "month\n", "
\n", "
int
\n", "\n", " month covered by a page\n", "\n", "
\n", "\n", "
\n", "
\n", "n\n", "
\n", "
int
\n", "\n", " sequence number of a volume, page, line\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "punc\n", "
\n", "
str
\n", "\n", " punctuation and/or space immediately after a word\n", "\n", "
\n", "\n", "
\n", "
\n", "side\n", "
\n", "
str
\n", "\n", " whether the page is a left or right page\n", "\n", "
\n", "\n", "
\n", "
\n", "year\n", "
\n", "
int
\n", "\n", " year covered by a page\n", "\n", "
\n", "\n", "
\n", "
\n", "years\n", "
\n", "
str
\n", "\n", " years covered by the volume\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: CLARIAH/wp6-daghregisters
  3. appPath: /Users/me/github/CLARIAH/wp6-daghregisters/app
  4. commit: no value
  5. css: ''
  6. docs:
    • docPage: transcription
    • featureBase:https://github.com/{org}/{repo}/blob/master/docs/transcription{docExt}
    • featurePage: ''
  7. interfaceDefaults: {}
  8. isCompatible: True
  9. local: clone
  10. localDir: /Users/me/github/CLARIAH/wp6-daghregisters/_temp
  11. provenanceSpec:
    • corpus: Dagh Registers Dutch East India Company 1640-1641
    • org: CLARIAH
    • relative: /tf
    • repo: wp6-daghregisters
    • version: 0.1
  12. release: no value
  13. typeDisplay: {}
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Data to be zipped:\n", "\tOK app (v0.2 786263) : ~/github/CLARIAH/wp6-daghregisters/app\n", "\tOK main data (v0.2 786263) : ~/github/CLARIAH/wp6-daghregisters/tf/0.1\n", "Writing zip file ...\n", " 0.22s ~/Downloads/github/CLARIAH/wp6-daghregisters/complete.zip\n", "=== Dhammapada ===>\n" ] }, { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/ETCBC/dhammapada/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/ETCBC/dhammapada/tf/0.2" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.4, ETCBC/dhammapada/app v3, Search Reference
\n", " Data: ETCBC - dhammapada 0.2, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
vagga26497.00100
stanza47527.20100
sentence91314.15100
clause23285.55100
word129221.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
Dhammapada-Latine\n", "
\n", "\n", "
\n", "
\n", "clarity\n", "
\n", "
int
\n", "\n", " word is inserted for clarity, marked by inclusion in ( and ); only in Latin translation\n", "\n", "
\n", "\n", "
\n", "
\n", "extrastanza\n", "
\n", "
int
\n", "\n", " word is outside a stanza, between stanzas or in pre/post vagga material\n", "\n", "
\n", "\n", "
\n", "
\n", "freq_occ\n", "
\n", "
int
\n", "\n", " the number of times that this word occurs\n", "\n", "
\n", "\n", "
\n", "
\n", "latin\n", "
\n", "
str
\n", "\n", " bare word (without non-word-letters)\n", "\n", "
\n", "\n", "
\n", "
\n", "latinpost\n", "
\n", "
str
\n", "\n", " non-word letters after word, with trailing spaces\n", "\n", "
\n", "\n", "
\n", "
\n", "latinpre\n", "
\n", "
str
\n", "\n", " non-word letters before word, no leading spaces\n", "\n", "
\n", "\n", "
\n", "
\n", "n\n", "
\n", "
int
\n", "\n", " number of vagga, stanza (relative to work), sentence, clause (both relative to vagga)\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "pali\n", "
\n", "
str
\n", "\n", " bare word (without non-word-letters)\n", "\n", "
\n", "\n", "
\n", "
\n", "palipost\n", "
\n", "
str
\n", "\n", " non-word letters after word, with trailing spaces\n", "\n", "
\n", "\n", "
\n", "
\n", "palipre\n", "
\n", "
str
\n", "\n", " non-word letters before word, no leading spaces\n", "\n", "
\n", "\n", "
\n", "
\n", "quote\n", "
\n", "
int
\n", "\n", " word is inside a quote\n", "\n", "
\n", "\n", "
\n", "
\n", "trans\n", "
\n", "
int
\n", "\n", " whether the node belongs to the original text or a translation\n", "\n", "
\n", "\n", "
\n", "
\n", "uncertain\n", "
\n", "
int
\n", "\n", " word is marked as uncertain by inclusion in [ and ]; only in Pali original\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: ETCBC/dhammapada
  3. appPath: /Users/me/github/ETCBC/dhammapada/app
  4. commit: no value
  5. css:.trans1 {
    color: #0000cc;
    }
    .extrastanza1 {
    font-weight: bold;
    }
    .clarity1 {
    font-family: monospace;
    color: #8888ff;
    }
    .uncertain1 {
    font-family: monospace;
    color: #888888;
    }
    .quote1 {
    font-style: italic;
    }
  6. dataDisplay:
    • exampleSectionHtml: <code>Vagga 1:1</code>
    • noneValues:
      • none
      • unknown
      • no value
      • NA
    • textFormats:
      • layout-latin-full: {method: layoutLatin}
      • layout-orig-full: {method: layoutOrig}
      • layout-pali-full: {method: layoutPali}
  7. docs:
    • docPage: about
    • featureBase: {docBase}/transcription.md
    • featurePage: transcription
  8. interfaceDefaults: {}
  9. isCompatible: True
  10. local: clone
  11. localDir: /Users/me/github/ETCBC/dhammapada/_temp
  12. provenanceSpec:
    • corpus: Dhammapada-Latine
    • doi: 10.5281/zenodo.1007624
    • org: ETCBC
    • relative: /tf
    • repo: dhammapada
    • version: 0.2
    • webBase: https://www.tipitaka.net/tipitaka/dhp
    • webHint:Show this stanza with English translation and comments on tipitaka
    • webUrl: {webBase}/verseload.php?verse=<2>
    • webUrlZeros: {2: 3}
  13. release: no value
  14. typeDisplay:
    • clause: {label: {n}}
    • sentence: {label: {n}}
    • word: {features: quote uncertain clarity}
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Data to be zipped:\n", "\tOK app (v0.2 0cad6b) : ~/github/ETCBC/dhammapada/app\n", "\tOK main data (v0.2 0cad6b) : ~/github/ETCBC/dhammapada/tf/0.2\n", "Writing zip file ...\n", " 0.07s ~/Downloads/github/ETCBC/dhammapada/complete.zip\n", "=== Peshitta ===>\n" ] }, { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/ETCBC/peshitta/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/ETCBC/peshitta/tf/0.2" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.4, ETCBC/peshitta/app v3, Search Reference
\n", " Data: ETCBC - peshitta 0.2, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
book656566.69100
chapter1269336.36100
verse3134113.62100
word4268351.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
Peshitta (Old Testament)\n", "
\n", "\n", "
\n", "
\n", "book\n", "
\n", "
str
\n", "\n", " book name\n", "\n", "
\n", "\n", "
\n", "
\n", "book@ll\n", "
\n", "
str
\n", "\n", " ?\n", "\n", "
\n", "\n", "
\n", "
\n", "chapter\n", "
\n", "
int
\n", "\n", " chapter number\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " ?\n", "\n", "
\n", "\n", "
\n", "
\n", "trailer\n", "
\n", "
str
\n", "\n", " after-word material in syriac script\n", "\n", "
\n", "\n", "
\n", "
\n", "trailer_etcbc\n", "
\n", "
str
\n", "\n", " after-word material in ETCBC transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "verse\n", "
\n", "
int
\n", "\n", " verse number\n", "\n", "
\n", "\n", "
\n", "
\n", "witness\n", "
\n", "
str
\n", "\n", " book witness (A or B)\n", "\n", "
\n", "\n", "
\n", "
\n", "word\n", "
\n", "
str
\n", "\n", " full form of the word in syriac script\n", "\n", "
\n", "\n", "
\n", "
\n", "word_etcbc\n", "
\n", "
str
\n", "\n", " full form of the word in ETCBC transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " ?\n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: ETCBC/peshitta
  3. appPath: /Users/me/github/ETCBC/peshitta/app
  4. commit: no value
  5. css: ''
  6. dataDisplay:
  7. \n", " noneValues:\n", "
    • none
    • unknown
    • no value
    • NA
    \n", "
  8. docs:
    • docPage: transcription
    • featureBase: {docBase}/transcription-{version}{docExt}#<feature>
    • featurePage: ''
  9. interfaceDefaults: {}
  10. isCompatible: True
  11. local: clone
  12. localDir: /Users/me/github/ETCBC/peshitta/_temp
  13. provenanceSpec:
    • corpus: Peshitta (Old Testament)
    • doi: 10.5281/zenodo.1463675
    • org: ETCBC
    • relative: /tf
    • repo: peshitta
    • version: 0.2
    • webBase: {urlGh}/{org}/{repo}/blob/master/source
    • webHint: Show this document in the Peshitta repository
    • webLang: en
    • webUrl: {webBase}/{version}/<1>
  14. release: no value
  15. typeDisplay: {}
  16. writing: syc
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Data to be zipped:\n", "\tOK app (v0.5 0b440a) : ~/github/ETCBC/peshitta/app\n", "\tOK main data (v0.5 0b440a) : ~/github/ETCBC/peshitta/tf/0.2\n", "Writing zip file ...\n", " 0.45s ~/Downloads/github/ETCBC/peshitta/complete.zip\n", "=== SyrNT ===>\n" ] }, { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/ETCBC/syrnt/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/ETCBC/syrnt/tf/0.1" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.4, ETCBC/syrnt/app v3, Search Reference
\n", " Data: ETCBC - syrnt 0.1, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
book274060.74100
chapter260421.69100
lexeme303836.09100
verse795713.78100
word1096401.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
SyrNT\n", "
\n", "\n", "
\n", "
\n", "book\n", "
\n", "
str
\n", "\n", " book name\n", "\n", "
\n", "\n", "
\n", "
\n", "book@ll\n", "
\n", "
str
\n", "\n", " ?\n", "\n", "
\n", "\n", "
\n", "
\n", "chapter\n", "
\n", "
int
\n", "\n", " chapter number\n", "\n", "
\n", "\n", "
\n", "
\n", "demcat\n", "
\n", "
str
\n", "\n", " demonstrative category\n", "\n", "
\n", "\n", "
\n", "
\n", "fmhdot\n", "
\n", "
int
\n", "\n", " feminine he dot\n", "\n", "
\n", "\n", "
\n", "
\n", "gn\n", "
\n", "
str
\n", "\n", " gender\n", "\n", "
\n", "\n", "
\n", "
\n", "lexeme\n", "
\n", "
str
\n", "\n", " lexeme of the word in syriac script\n", "\n", "
\n", "\n", "
\n", "
\n", "lexeme_etcbc\n", "
\n", "
str
\n", "\n", " lexeme of the word in ETCBC/Wit transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "lexeme_sedra\n", "
\n", "
str
\n", "\n", " lexeme of the word in SEDRA transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "nmtyp\n", "
\n", "
str
\n", "\n", " numeral type\n", "\n", "
\n", "\n", "
\n", "
\n", "ntyp\n", "
\n", "
str
\n", "\n", " noun type\n", "\n", "
\n", "\n", "
\n", "
\n", "nu\n", "
\n", "
str
\n", "\n", " number\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " ?\n", "\n", "
\n", "\n", "
\n", "
\n", "prefix\n", "
\n", "
str
\n", "\n", " prefix of the word in syriac script\n", "\n", "
\n", "\n", "
\n", "
\n", "prefix_etcbc\n", "
\n", "
str
\n", "\n", " prefix of the word in ETCBC/Wit transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "prefix_sedra\n", "
\n", "
str
\n", "\n", " prefix of the word in SEDRA transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "prtyp\n", "
\n", "
str
\n", "\n", " pronoun_type\n", "\n", "
\n", "\n", "
\n", "
\n", "ps\n", "
\n", "
str
\n", "\n", " person\n", "\n", "
\n", "\n", "
\n", "
\n", "ptctyp\n", "
\n", "
str
\n", "\n", " participle type\n", "\n", "
\n", "\n", "
\n", "
\n", "root\n", "
\n", "
str
\n", "\n", " root of the word in syriac script\n", "\n", "
\n", "\n", "
\n", "
\n", "root_etcbc\n", "
\n", "
str
\n", "\n", " root of the word in ETCBC/Wit transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "root_sedra\n", "
\n", "
str
\n", "\n", " root of the word in SEDRA transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "seyame\n", "
\n", "
int
\n", "\n", " seyame\n", "\n", "
\n", "\n", "
\n", "
\n", "sfcontract\n", "
\n", "
str
\n", "\n", " suffix contraction\n", "\n", "
\n", "\n", "
\n", "
\n", "sfgn\n", "
\n", "
str
\n", "\n", " suffix gender\n", "\n", "
\n", "\n", "
\n", "
\n", "sfnu\n", "
\n", "
str
\n", "\n", " suffix number\n", "\n", "
\n", "\n", "
\n", "
\n", "sfps\n", "
\n", "
str
\n", "\n", " suffix person\n", "\n", "
\n", "\n", "
\n", "
\n", "sp\n", "
\n", "
str
\n", "\n", " part of speech (grammatical category)\n", "\n", "
\n", "\n", "
\n", "
\n", "st\n", "
\n", "
str
\n", "\n", " state\n", "\n", "
\n", "\n", "
\n", "
\n", "stem\n", "
\n", "
str
\n", "\n", " stem of the word in syriac script\n", "\n", "
\n", "\n", "
\n", "
\n", "stem_etcbc\n", "
\n", "
str
\n", "\n", " stem of the word in ETCBC/Wit transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "stem_sedra\n", "
\n", "
str
\n", "\n", " stem of the word in SEDRA transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "suffix\n", "
\n", "
str
\n", "\n", " suffix of the word in syriac script\n", "\n", "
\n", "\n", "
\n", "
\n", "suffix_etcbc\n", "
\n", "
str
\n", "\n", " suffix of the word in ETCBC/Wit transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "suffix_sedra\n", "
\n", "
str
\n", "\n", " suffix of the word in SEDRA transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "verse\n", "
\n", "
int
\n", "\n", " verse number\n", "\n", "
\n", "\n", "
\n", "
\n", "vs\n", "
\n", "
str
\n", "\n", " verbal conjugation\n", "\n", "
\n", "\n", "
\n", "
\n", "vt\n", "
\n", "
str
\n", "\n", " verbal aspect (tense)\n", "\n", "
\n", "\n", "
\n", "
\n", "word\n", "
\n", "
str
\n", "\n", " full form of the word in syriac script\n", "\n", "
\n", "\n", "
\n", "
\n", "word_etcbc\n", "
\n", "
str
\n", "\n", " full form of the word in ETCBC/Wit transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "word_sedra\n", "
\n", "
str
\n", "\n", " full form of the word in SEDRA transcription\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " ?\n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: ETCBC/syrnt
  3. appPath: /Users/me/github/ETCBC/syrnt/app
  4. commit: no value
  5. css: ''
  6. dataDisplay:
  7. \n", " noneValues:\n", "
    • none
    • unknown
    • no value
    • NA
    \n", "
  8. docs:
    • docPage: transcription
    • featureBase: {docBase}/transcription-{version}{docExt}
    • featurePage: ''
  9. interfaceDefaults: {}
  10. isCompatible: True
  11. local: clone
  12. localDir: /Users/me/github/ETCBC/syrnt/_temp
  13. provenanceSpec:
    • corpus: SyrNT
    • doi: 10.5281/zenodo.1464787
    • org: ETCBC
    • relative: /tf
    • repo: syrnt
    • version: 0.1
    • webBase: {urlGh}/{org}/{repo}/blob/master/plain
    • webHint: show this passage in the SyrNT repository
    • webLang: en
    • webUrl: {webBase}/{version}/<1>.txt
  14. release: no value
  15. typeDisplay:
    • lexeme:
      • label: {lexeme}
      • lexOcc: word
      • template: {lexeme}
    • word:
      • features: vs vt
      • featuresBare: sp
  16. writing: syc
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Data to be zipped:\n", "\tOK app (0.3 8175ce) : ~/github/ETCBC/syrnt/app\n", "\tOK main data (0.3 8175ce) : ~/github/ETCBC/syrnt/tf/0.1\n", "Writing zip file ...\n", " 0.64s ~/Downloads/github/ETCBC/syrnt/complete.zip\n", "=== NinMed ===>\n" ] }, { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/Nino-cunei/ninmed/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/Nino-cunei/ninmed/tf/0.3" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.4, Nino-cunei/ninmed/app v3, Search Reference
\n", " Data: Nino-cunei - ninmed 0.3, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
document341553.79100
face54978.31100
line308217.14100
cluster63962.6232
word308161.71100
sign528291.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
Nineveh Medical Encyclopedia 800 BCE: Cuneiform tablets\n", "
\n", "\n", "
\n", "
\n", "after\n", "
\n", "
str
\n", "\n", " what comes after a sign or word (- or space)\n", "\n", "
\n", "\n", "
\n", "
\n", "atf\n", "
\n", "
str
\n", "\n", " full atf of a sign\n", "\n", "
\n", "\n", "
\n", "
\n", "atfpost\n", "
\n", "
str
\n", "\n", " cluster characters that follow a sign or word\n", "\n", "
\n", "\n", "
\n", "
\n", "atfpre\n", "
\n", "
str
\n", "\n", " cluster characters that precede a sign or word\n", "\n", "
\n", "\n", "
\n", "
\n", "col\n", "
\n", "
int
\n", "\n", " ATF column number\n", "\n", "
\n", "\n", "
\n", "
\n", "collated\n", "
\n", "
int
\n", "\n", " whether a sign is collated (*)\n", "\n", "
\n", "\n", "
\n", "
\n", "collection\n", "
\n", "
str
\n", "\n", " collection name from metadata field \"collection\"\n", "\n", "
\n", "\n", "
\n", "
\n", "colofon\n", "
\n", "
str
\n", "\n", " colofon comment to a line\n", "\n", "
\n", "\n", "
\n", "
\n", "comment\n", "
\n", "
str
\n", "\n", " comment to a line\n", "\n", "
\n", "\n", "
\n", "
\n", "damage\n", "
\n", "
int
\n", "\n", " whether a sign is damaged\n", "\n", "
\n", "\n", "
\n", "
\n", "description\n", "
\n", "
str
\n", "\n", " description from metadata field \"description\"\n", "\n", "
\n", "\n", "
\n", "
\n", "det\n", "
\n", "
int
\n", "\n", " whether a sign is a determinative gloss - between { }\n", "\n", "
\n", "\n", "
\n", "
\n", "docnumber\n", "
\n", "
str
\n", "\n", " document number from metadata field \"number\"\n", "\n", "
\n", "\n", "
\n", "
\n", "erasure\n", "
\n", "
int
\n", "\n", " whether a sign is in an erasure - between ° \\ °: 1: between ° and \\; 2: between \\ and °\n", "\n", "
\n", "\n", "
\n", "
\n", "excised\n", "
\n", "
int
\n", "\n", " whether a sign is excised - between << >>\n", "\n", "
\n", "\n", "
\n", "
\n", "face\n", "
\n", "
str
\n", "\n", " full name of a face including the enclosing object\n", "\n", "
\n", "\n", "
\n", "
\n", "flags\n", "
\n", "
str
\n", "\n", " sequence of flags after a sign\n", "\n", "
\n", "\n", "
\n", "
\n", "gloss\n", "
\n", "
int
\n", "\n", " whether a sign belongs to a gloss - between {( )}\n", "\n", "
\n", "\n", "
\n", "
\n", "grapheme\n", "
\n", "
str
\n", "\n", " grapheme of a sign\n", "\n", "
\n", "\n", "
\n", "
\n", "lang\n", "
\n", "
str
\n", "\n", " language of a document, word, or sign: absent: Akkadian; sux: Sumerian; sb: Standard Babylonian\n", "\n", "
\n", "\n", "
\n", "
\n", "lemma\n", "
\n", "
str
\n", "\n", " lemma of a word:comma-separated values of the uniqueLemma field in the JSON source\n", "\n", "
\n", "\n", "
\n", "
\n", "lln\n", "
\n", "
int
\n", "\n", " logical line number of a numbered line\n", "\n", "
\n", "\n", "
\n", "
\n", "ln\n", "
\n", "
int
\n", "\n", " ATF line number of a numbered line, without prime\n", "\n", "
\n", "\n", "
\n", "
\n", "lnno\n", "
\n", "
str
\n", "\n", " ATF line number, may be $ or #, with prime; column number prepended\n", "\n", "
\n", "\n", "
\n", "
\n", "missing\n", "
\n", "
int
\n", "\n", " whether a sign is missing - between [ ]\n", "\n", "
\n", "\n", "
\n", "
\n", "modifiers\n", "
\n", "
str
\n", "\n", " sequence of modifiers after a sign\n", "\n", "
\n", "\n", "
\n", "
\n", "museum\n", "
\n", "
str
\n", "\n", " museum name from metadata field \"museum.name\"\n", "\n", "
\n", "\n", "
\n", "
\n", "note\n", "
\n", "
str
\n", "\n", " note comment to a line\n", "\n", "
\n", "\n", "
\n", "
\n", "number\n", "
\n", "
int
\n", "\n", " numeric value of a number sign\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "pnumber\n", "
\n", "
str
\n", "\n", " P number of a document\n", "\n", "
\n", "\n", "
\n", "
\n", "primecol\n", "
\n", "
int
\n", "\n", " whether a prime is present on a column number\n", "\n", "
\n", "\n", "
\n", "
\n", "primeln\n", "
\n", "
int
\n", "\n", " whether a prime is present on a line number\n", "\n", "
\n", "\n", "
\n", "
\n", "publication\n", "
\n", "
str
\n", "\n", " publication info from metadata field \"publication\"\n", "\n", "
\n", "\n", "
\n", "
\n", "question\n", "
\n", "
int
\n", "\n", " whether a sign has the question flag (?)\n", "\n", "
\n", "\n", "
\n", "
\n", "reading\n", "
\n", "
str
\n", "\n", " reading of a sign\n", "\n", "
\n", "\n", "
\n", "
\n", "remarkable\n", "
\n", "
int
\n", "\n", " whether a sign is remarkable (!)\n", "\n", "
\n", "\n", "
\n", "
\n", "ruling\n", "
\n", "
str
\n", "\n", " ruling comment to a line\n", "\n", "
\n", "\n", "
\n", "
\n", "seal\n", "
\n", "
str
\n", "\n", " seal comment to a line\n", "\n", "
\n", "\n", "
\n", "
\n", "supplied\n", "
\n", "
int
\n", "\n", " whether a sign is supplied - between < >\n", "\n", "
\n", "\n", "
\n", "
\n", "sym\n", "
\n", "
str
\n", "\n", " essential part of a sign or of a word\n", "\n", "
\n", "\n", "
\n", "
\n", "tr@ll\n", "
\n", "
str
\n", "\n", " english translation of a line\n", "\n", "
\n", "\n", "
\n", "
\n", "trans\n", "
\n", "
int
\n", "\n", " whether a line has a translation\n", "\n", "
\n", "\n", "
\n", "
\n", "type\n", "
\n", "
str
\n", "\n", " name of a type of cluster or kind of sign\n", "\n", "
\n", "\n", "
\n", "
\n", "uncertain\n", "
\n", "
int
\n", "\n", " whether a sign is uncertain - between ( )\n", "\n", "
\n", "\n", "
\n", "
\n", "variant\n", "
\n", "
int
\n", "\n", " if sign is part of a variant pair, this is the sequence number of the variant (1 or 2)\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: Nino-cunei/ninmed
  3. appPath: /Users/me/github/Nino-cunei/ninmed/app
  4. commit: no value
  5. css:.pnum {
    font-family: sans-serif;
    font-size: small;
    font-weight: bold;
    color: #444444;
    }
    .period {
    font-family: monospace;
    font-size: medium;
    font-weight: bold;
    color: #0000bb;
    }
    /* LANGUAGE: superscript and subscript */

    /* cluster */
    .det {
    vertical-align: super;
    }
    /* cluster */
    .lang {
    vertical-align: sub;
    }
    /* REDACTIONAL: line over or under */

    /* flag */
    .collated {
    font-weight: bold;
    text-decoration: underline;
    }
    /* cluster */
    .excised {
    color: #dd0000;
    text-decoration: line-through;
    }
    /* cluster */
    .supplied {
    color: #0000ff;
    text-decoration: overline;
    }
    /* flag */
    .remarkable {
    font-weight: bold;
    text-decoration: overline;
    }

    /* UNSURE: italic*/

    /* cluster */
    .uncertain {
    font-style: italic
    }
    /* flag */
    .question {
    font-weight: bold;
    font-style: italic
    }

    /* BROKEN: text-shadow */

    /* cluster */
    .missing {
    color: #999999;
    text-shadow: #bbbbbb 1px 1px;
    }
    /* flag */
    .damage {
    font-weight: bold;
    color: #999999;
    text-shadow: #bbbbbb 1px 1px;
    }
    .empty {
    color: #ff0000;
    }

  6. dataDisplay:
    • showVerseInTuple: True
    • textFormats:
      • layout-orig-full:
        • method: layoutFull
        • style: trans
      • layout-orig-plain:
        • method: layoutPlain
        • style: trans
      • text-orig-full: {style: trans}
      • text-orig-plain: {style: trans}
  7. docs:
    • charText: mapping from readings to UNICODE
    • charUrl:https://nbviewer.jupyter.org/github/Nino-cunei/tfFromAtf/blob/master/programs/mapReadings.ipynb
    • docPage: about
    • featureBase:https://github.com/Nino-cunei/ninmed/blob/master/docs/transcription{docExt}
    • featurePage: ''
  8. isCompatible: True
  9. local: clone
  10. localDir: /Users/me/github/Nino-cunei/ninmed/_temp
  11. provenanceSpec:
    • corpus: Nineveh Medical Encyclopedia 800 BCE: Cuneiform tablets
    • doi: 10.5281/zenodo.2579207
    • org: Nino-cunei
    • relative: /tf
    • repo: ninmed
    • version: 0.3
    • webBase: https://cdli.ucla.edu
    • webHint: Show this document on CDLI
    • webUrl:{webBase}/search/search_results.php?SearchMode=Text&ObjectID=<1>
  12. release: no value
  13. typeDisplay:
    • cluster:
      • label: {type}
      • stretch: 0
    • document: {featuresBare: docnumber}
    • face: {featuresBare: face}
    • line: {features: seal ruling note comment tr@en}
    • sign:
    • \n", " features:\n", " collated remarkable question damage det uncertain missing excised supplied lang\n", "
    • word:
      • base: True
      • label: True
      • wrap: 0
  14. writing: akk
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Data to be zipped:\n", "\tOK app (v0.3 49eee1) : ~/github/Nino-cunei/ninmed/app\n", "\tOK main data (v0.3 49eee1) : ~/github/Nino-cunei/ninmed/tf/0.3\n", "Writing zip file ...\n", " 0.17s ~/Downloads/github/Nino-cunei/ninmed/complete.zip\n", "=== Athenaeus ===>\n" ] }, { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/pthu/athenaeus/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/pthu/athenaeus/tf/1.1" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.4, pthu/athenaeus/app v3, Search Reference
\n", " Data: pthu - athenaeus 1.1, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
_book1265146.00100
head1265146.00100
book1517676.40100
hi783114.9492
cit1671586.02100
num275949.7799
add789335.94100
chapter1328199.66100
pb1549171.18100
p1571168.78100
quote312684.74100
bibl526451.09101
l1126723.50100
_sentence1477317.95100
word2651461.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
The Deipnosophistae by Athenaeus\n", "
\n", "\n", "
\n", "
\n", "_book\n", "
\n", "
str
\n", "\n", " the title of the book\n", "\n", "
\n", "\n", "
\n", "
\n", "_sentence\n", "
\n", "
int
\n", "\n", " numbering of sentences with \".\" as its delimiter\n", "\n", "
\n", "\n", "
\n", "
\n", "add\n", "
\n", "
int
\n", "\n", " open tag without further specification. See the name of the .tf-file for it's meaning\n", "\n", "
\n", "\n", "
\n", "
\n", "beta_plain\n", "
\n", "
str
\n", "\n", " the plain form of the text in betacode stripped of all accents and punctuation\n", "\n", "
\n", "\n", "
\n", "
\n", "bibl\n", "
\n", "
str
\n", "\n", " open tag without further specification. See the name of the .tf-file for it's meaning\n", "\n", "
\n", "\n", "
\n", "
\n", "book\n", "
\n", "
int
\n", "\n", " first section level\n", "\n", "
\n", "\n", "
\n", "
\n", "chapter\n", "
\n", "
int
\n", "\n", " second section level\n", "\n", "
\n", "\n", "
\n", "
\n", "cit\n", "
\n", "
int
\n", "\n", " open tag without further specification. See the name of the .tf-file for it's meaning\n", "\n", "
\n", "\n", "
\n", "
\n", "head\n", "
\n", "
str
\n", "\n", " open tag without further specification. See the name of the .tf-file for it's meaning\n", "\n", "
\n", "\n", "
\n", "
\n", "hi\n", "
\n", "
str
\n", "\n", " not provided\n", "\n", "
\n", "\n", "
\n", "
\n", "l\n", "
\n", "
int
\n", "\n", " open tag without further specification. See the name of the .tf-file for it's meaning\n", "\n", "
\n", "\n", "
\n", "
\n", "lemma\n", "
\n", "
str
\n", "\n", " the lemmatized form of the text tries to return as much as possible the words as a comma-separated list of possible lemmata. If no lemma could be found, the word is preceded by a \"*\". The lemmata have been defined by using the normalized text\n", "\n", "
\n", "\n", "
\n", "
\n", "main\n", "
\n", "
str
\n", "\n", " the original form of the text in unicode (UFD norm), but extensively normalized (no punctuation and other trailing characters, no elision, normalization of accents.\n", "\n", "
\n", "\n", "
\n", "
\n", "norm\n", "
\n", "
str
\n", "\n", " a normalized form of uni_main, according to the normalization of James Tauber: https://github.com/jtauber/greek-normalisation\n", "\n", "
\n", "\n", "
\n", "
\n", "num\n", "
\n", "
int
\n", "\n", " open tag without further specification. See the name of the .tf-file for it's meaning\n", "\n", "
\n", "\n", "
\n", "
\n", "orig\n", "
\n", "
str
\n", "\n", " the original form of the text in unicode (UFD norm), including accents and punctuation; if the original text was in betacode, it has been converted to unicode without any normalization\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "p\n", "
\n", "
int
\n", "\n", " open tag without further specification. See the name of the .tf-file for it's meaning\n", "\n", "
\n", "\n", "
\n", "
\n", "pb\n", "
\n", "
str
\n", "\n", " not given\n", "\n", "
\n", "\n", "
\n", "
\n", "plain\n", "
\n", "
str
\n", "\n", " the plain form of the text in unicode stripped of all accents and punctuation\n", "\n", "
\n", "\n", "
\n", "
\n", "post\n", "
\n", "
str
\n", "\n", " post gives non-letter characters at the end of a word\n", "\n", "
\n", "\n", "
\n", "
\n", "pre\n", "
\n", "
str
\n", "\n", " pre gives non-letter characters at the start of a word\n", "\n", "
\n", "\n", "
\n", "
\n", "quote\n", "
\n", "
str
\n", "\n", " not provided\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: pthu/athenaeus
  3. appPath: /Users/me/github/pthu/athenaeus/app
  4. commit: no value
  5. css: ''
  6. dataDisplay: {}
  7. docs: {}
  8. interfaceDefaults: {}
  9. isCompatible: True
  10. local: clone
  11. localDir: /Users/me/github/pthu/athenaeus/_temp
  12. provenanceSpec:
    • corpus: The Deipnosophistae by Athenaeus
    • org: pthu
    • relative: /tf
    • repo: athenaeus
    • version: 1.1
  13. release: no value
  14. typeDisplay:
    • p: {label: {p}}
    • word: {featuresBare: beta_plain}
  15. writing: grc
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Data to be zipped:\n", "\tOK app (v1.1 880dd3) : ~/github/pthu/athenaeus/app\n", "\tOK main data (v1.1 880dd3) : ~/github/pthu/athenaeus/tf/1.1\n", "Writing zip file ...\n", " 1.21s ~/Downloads/github/pthu/athenaeus/complete.zip\n", "=== Fusus ===>\n" ] }, { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/github/among/fusus/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/github/among/fusus/tf/0.7" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " TF: TF API 12.5.4, among/fusus/app v3, Search Reference
\n", " Data: among - fusus 0.7, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
piece291413.21100
page403101.69100
sentence244116.79100
line43699.38100
column44599.19100
span44599.19100
word409831.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
Fusus Al Hikam, by Ibn Arabi, merged editions Lakhnawi-Afifi\n", "
\n", "\n", "
\n", "
\n", "combine_af\n", "
\n", "
int
\n", "\n", " number of consecutive words in the Afifi textthat form an alignment entry with 1 or more words in the Lakhnawi text\n", "\n", "
\n", "\n", "
\n", "
\n", "combine_lk\n", "
\n", "
int
\n", "\n", " number of consecutive words in the Lakhnawi textthat form an alignment entry with 1 or more words in the Afifi text\n", "\n", "
\n", "\n", "
\n", "
\n", "dir\n", "
\n", "
str
\n", "\n", " writing direction of a span\n", "\n", "
\n", "\n", "
\n", "
\n", "editdistance\n", "
\n", "
int
\n", "\n", " edit distance between the Lakhnawi part in an alignment entryand its Afifi counterpart\n", "\n", "
\n", "\n", "
\n", "
\n", "fass\n", "
\n", "
int
\n", "\n", " number of the piece (bezel) that the word belongs to\n", "\n", "
\n", "\n", "
\n", "
\n", "letters\n", "
\n", "
str
\n", "\n", " text string of a word without punctuation\n", "\n", "
\n", "\n", "
\n", "
\n", "letters_af\n", "
\n", "
str
\n", "\n", " text string of a word without punctuation (Afifi edition)\n", "\n", "
\n", "\n", "
\n", "
\n", "lettersn\n", "
\n", "
str
\n", "\n", " text string of a word in latin transcription (beta code)\n", "\n", "
\n", "\n", "
\n", "
\n", "lettersn_af\n", "
\n", "
str
\n", "\n", " text string of a word in latin transcription (beta code) (Afifi edition)\n", "\n", "
\n", "\n", "
\n", "
\n", "lettersp\n", "
\n", "
str
\n", "\n", " text string of a word in ascii transcription (beta code)\n", "\n", "
\n", "\n", "
\n", "
\n", "lettersp_af\n", "
\n", "
str
\n", "\n", " text string of a word in ascii transcription (beta code) (Afifi edition)\n", "\n", "
\n", "\n", "
\n", "
\n", "letterst\n", "
\n", "
str
\n", "\n", " text string of a word in romanized transcription (Library of Congress)\n", "\n", "
\n", "\n", "
\n", "
\n", "letterst_af\n", "
\n", "
str
\n", "\n", " text string of a word in romanized transcription (Library of Congress) (Afifi edition)\n", "\n", "
\n", "\n", "
\n", "
\n", "line_af\n", "
\n", "
int
\n", "\n", " line number in the raw fususa dataset, which is obtained from ocr-ing the Afifi page images\n", "\n", "
\n", "\n", "
\n", "
\n", "ln\n", "
\n", "
int
\n", "\n", " sequence number of a line within a page\n", "\n", "
\n", "\n", "
\n", "
\n", "lwcvl\n", "
\n", "
str
\n", "\n", " personal notes by Cornelis van Lit\n", "\n", "
\n", "\n", "
\n", "
\n", "n\n", "
\n", "
int
\n", "\n", " sequence number of a piece, page, column within a line, or span\n", "\n", "
\n", "\n", "
\n", "
\n", "np\n", "
\n", "
int
\n", "\n", " sequence number of a proper content piece\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "page_af\n", "
\n", "
int
\n", "\n", " page number in the raw fususa dataset, which is obtained from ocr-ing the Afifi page images\n", "\n", "
\n", "\n", "
\n", "
\n", "poetrymeter\n", "
\n", "
str
\n", "\n", " meter in which this verse is written\n", "\n", "
\n", "\n", "
\n", "
\n", "poetryverse\n", "
\n", "
int
\n", "\n", " word is start of a verse of poetry, value is the number of the verse\n", "\n", "
\n", "\n", "
\n", "
\n", "punc\n", "
\n", "
str
\n", "\n", " punctuation and/or space immediately after a word\n", "\n", "
\n", "\n", "
\n", "
\n", "punc_af\n", "
\n", "
str
\n", "\n", " punctuation and/or space immediately after a word (Afifi edition)\n", "\n", "
\n", "\n", "
\n", "
\n", "punca\n", "
\n", "
str
\n", "\n", " punctuation and/or space immediately after a word\n", "\n", "
\n", "\n", "
\n", "
\n", "punca_af\n", "
\n", "
str
\n", "\n", " punctuation and/or space immediately after a word (Afifi edition)\n", "\n", "
\n", "\n", "
\n", "
\n", "puncb\n", "
\n", "
str
\n", "\n", " punctuation immediately before a word\n", "\n", "
\n", "\n", "
\n", "
\n", "puncba\n", "
\n", "
str
\n", "\n", " punctuation immediately before a word\n", "\n", "
\n", "\n", "
\n", "
\n", "qunawims\n", "
\n", "
str
\n", "\n", " on which folio of the oldest manuscript, penned by Qunawi himself, is this word attested?\n", "\n", "
\n", "\n", "
\n", "
\n", "quran\n", "
\n", "
str
\n", "\n", " word is part of a quran citation (sura:aya)\n", "\n", "
\n", "\n", "
\n", "
\n", "ratio\n", "
\n", "
int
\n", "\n", " ratio (=similarity) between the Lakhnawi part in an alignment entryand its Afifi counterpart\n", "\n", "
\n", "\n", "
\n", "
\n", "raw\n", "
\n", "
str
\n", "\n", " letters of the word straight from the pdf\n", "\n", "
\n", "\n", "
\n", "
\n", "slot_af\n", "
\n", "
int
\n", "\n", " slot number in the raw fususa dataset, which is obtained from ocr-ing the Afifi page images\n", "\n", "
\n", "\n", "
\n", "
\n", "slot_lk\n", "
\n", "
int
\n", "\n", " slot number in the raw fususl dataset, which is obtained from reverse-engineering the Lakhnawi pdf\n", "\n", "
\n", "\n", "
\n", "
\n", "title\n", "
\n", "
str
\n", "\n", " title of a piece\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: among/fusus
  3. appPath: /Users/me/github/among/fusus/app
  4. commit: no value
  5. css: ''
  6. dataDisplay:
  7. \n", " noneValues:\n", "
    • none
    • unknown
    • no value
    • NA
    \n", "
  8. docs:
    • docBase: https://{org}.github.io/{repo}/fusus
    • docExt: .html
    • docPage: align
    • featureBase: {docBase}/about/transcription
    • featurePage: ''
  9. interfaceDefaults: {}
  10. isCompatible: True
  11. local: clone
  12. localDir: /Users/me/github/among/fusus/_temp
  13. provenanceSpec:
    • corpus:Fusus Al Hikam, by Ibn Arabi, merged editions Lakhnawi-Afifi
    • doi: 10.5281/zenodo.xx1464787
    • org: among
    • relative: /tf
    • repo: fusus
    • version: 0.7
    • webBase:https://{org}.github.io/{repo}/fusus/assets/lakhnawi-with-toc
    • webHint:show this passage in the the original (html derived from pdf)
    • webLang: en
    • webUrl: {webBase}.html#p<2>
    • webUrlZeros: {2: 3}
  14. release: no value
  15. typeDisplay:
  16. \n", " word:\n", "
  17. \n", " features:\n", " lettersn lettersp letterst letters_af lettersn_af lettersp_af letterst_af\n", "
  18. \n", "
  19. writing: ara
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Data to be zipped:\n", "\tOK app (v0.8 56174d) : ~/github/among/fusus/app\n", "\tOK main data (v0.8 56174d) : ~/github/among/fusus/tf/0.7\n", "Writing zip file ...\n", " 0.31s ~/Downloads/github/among/fusus/complete.zip\n" ] } ], "source": [ "for (corpus, (backend, org, repo)) in corpora.items():\n", " print(f\"=== {corpus} ===>\")\n", " collect(backend, org, repo)" ] }, { "cell_type": "code", "execution_count": null, "id": "65ad72fe-9bc9-4c13-aedb-9186e93d3cac", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 5 }