{ "cells": [ { "cell_type": "markdown", "id": "ce9571d4-211a-482e-bba4-eb8d5a2f86eb", "metadata": {}, "source": [ "# Values frequency for selected features" ] }, { "cell_type": "code", "execution_count": 1, "id": "81014f3a-a50a-4576-b2e8-7386ad137c93", "metadata": {}, "outputs": [], "source": [ "# Loading the Text-Fabric code\n", "# Note: it is assumed Text-Fabric is installed in your environment\n", "from tf.fabric import Fabric\n", "from tf.app import use" ] }, { "cell_type": "code", "execution_count": 2, "id": "28f5d706-4a62-4164-a766-f4ecc494da92", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "**Locating corpus resources ...**" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "The requested app is not available offline\n", "\t~/text-fabric-data/github/tonyjurg/Nestle1904LFT/app not found\n" ] }, { "data": { "text/html": [ "Status: latest release online v0.6 versus None locally" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "downloading app, main data and requested additions ..." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "app: ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/app" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "The requested data is not available offline\n", "\t~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6 not found\n" ] }, { "data": { "text/html": [ "Status: latest release online v0.6 versus None locally" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "downloading app, main data and requested additions ..." ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "data: ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ " | 0.21s T otype from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 2.27s T oslots from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.46s T verse from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.49s T after from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.56s T book from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.49s T chapter from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.61s T wordtranslit from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.63s T normalized from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.60s T wordunacc from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.60s T word from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.61s T unicode from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | | 0.06s C __levels__ from otype, oslots, otext\n", " | | 1.79s C __order__ from otype, oslots, __levels__\n", " | | 0.07s C __rank__ from otype, __order__\n", " | | 3.31s C __levUp__ from otype, oslots, __rank__\n", " | | 1.92s C __levDown__ from otype, __levUp__, __rank__\n", " | | 0.22s C __characters__ from otext\n", " | | 0.92s C __boundary__ from otype, oslots, __rank__\n", " | | 0.04s C __sections__ from otype, oslots, otext, __levUp__, __levels__, book, chapter, verse\n", " | | 0.23s C __structure__ from otype, oslots, otext, __rank__, __levUp__, book, chapter, verse\n", " | 0.43s T booknumber from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.49s T bookshort from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.47s T case from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.32s T clausetype from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.55s T containedclause from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.41s T degree from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.56s T gloss from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.46s T gn from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.04s T headverse from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.32s T junction from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.57s T lemma from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.52s T lex_dom from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.55s T ln from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.41s T markafter from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.41s T markbefore from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.41s T markorder from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.44s T monad from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.44s T mood from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.52s T morph from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.53s T nodeID from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.48s T nu from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.48s T number from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.43s T person from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.42s T punctuation from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.64s T ref from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.65s T reference from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.49s T roleclausedistance from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.47s T sentence from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.51s T sp from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.51s T sp_full from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.54s T strongs from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.45s T subj_ref from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.44s T tense from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.45s T type from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.44s T voice from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.38s T wgclass from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.33s T wglevel from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.35s T wgnum from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.34s T wgrole from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.35s T wgrolelong from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.38s T wgrule from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.33s T wgtype from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.50s T wordlevel from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.50s T wordrole from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n", " | 0.52s T wordrolelong from ~/text-fabric-data/github/tonyjurg/Nestle1904LFT/tf/0.6\n" ] }, { "data": { "text/html": [ "\n", " TF: TF API 12.1.5, tonyjurg/Nestle1904LFT/app v3, Search Reference
\n", " Data: tonyjurg - Nestle1904LFT 0.6, Character table, Feature docs
\n", "
Node types\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "
Name# of nodes# slots / node% coverage
book275102.93100
chapter260529.92100
verse794317.35100
sentence801117.20100
wg1054306.85524
word1377791.00100
\n", " Sets: no custom sets
\n", " Features:
\n", "
Nestle 1904 (Low Fat Tree)\n", "
\n", "\n", "
\n", "
\n", "after\n", "
\n", "
str
\n", "\n", " ✅ Characters (eg. punctuations) following the word\n", "\n", "
\n", "\n", "
\n", "
\n", "book\n", "
\n", "
str
\n", "\n", " ✅ Book name (in English language)\n", "\n", "
\n", "\n", "
\n", "
\n", "booknumber\n", "
\n", "
int
\n", "\n", " ✅ NT book number (Matthew=1, Mark=2, ..., Revelation=27)\n", "\n", "
\n", "\n", "
\n", "
\n", "bookshort\n", "
\n", "
str
\n", "\n", " ✅ Book name (abbreviated)\n", "\n", "
\n", "\n", "
\n", "
\n", "case\n", "
\n", "
str
\n", "\n", " ✅ Gramatical case (Nominative, Genitive, Dative, Accusative, Vocative)\n", "\n", "
\n", "\n", "
\n", "
\n", "chapter\n", "
\n", "
int
\n", "\n", " ✅ Chapter number inside book\n", "\n", "
\n", "\n", "
\n", "
\n", "clausetype\n", "
\n", "
str
\n", "\n", " ✅ Clause type details (e.g. Verbless, Minor)\n", "\n", "
\n", "\n", "
\n", "
\n", "containedclause\n", "
\n", "
str
\n", "\n", " 🆗 Contained clause (WG number)\n", "\n", "
\n", "\n", "
\n", "
\n", "degree\n", "
\n", "
str
\n", "\n", " ✅ Degree (e.g. Comparitative, Superlative)\n", "\n", "
\n", "\n", "
\n", "
\n", "gloss\n", "
\n", "
str
\n", "\n", " ✅ English gloss\n", "\n", "
\n", "\n", "
\n", "
\n", "gn\n", "
\n", "
str
\n", "\n", " ✅ Gramatical gender (Masculine, Feminine, Neuter)\n", "\n", "
\n", "\n", "
\n", "
\n", "headverse\n", "
\n", "
str
\n", "\n", " ✅ Start verse number of a sentence\n", "\n", "
\n", "\n", "
\n", "
\n", "junction\n", "
\n", "
str
\n", "\n", " ✅ Junction data related to a wordgroup\n", "\n", "
\n", "\n", "
\n", "
\n", "lemma\n", "
\n", "
str
\n", "\n", " ✅ Lexeme (lemma)\n", "\n", "
\n", "\n", "
\n", "
\n", "lex_dom\n", "
\n", "
str
\n", "\n", " ✅ Lexical domain according to Semantic Dictionary of Biblical Greek, SDBG (not present everywhere?)\n", "\n", "
\n", "\n", "
\n", "
\n", "ln\n", "
\n", "
str
\n", "\n", " ✅ Lauw-Nida lexical classification (not present everywhere?)\n", "\n", "
\n", "\n", "
\n", "
\n", "markafter\n", "
\n", "
str
\n", "\n", " 🆗 Text critical marker after word\n", "\n", "
\n", "\n", "
\n", "
\n", "markbefore\n", "
\n", "
str
\n", "\n", " 🆗 Text critical marker before word\n", "\n", "
\n", "\n", "
\n", "
\n", "markorder\n", "
\n", "
str
\n", "\n", "  Order of punctuation and text critical marker\n", "\n", "
\n", "\n", "
\n", "
\n", "monad\n", "
\n", "
int
\n", "\n", " ✅ Monad (smallest token matching word order in the corpus)\n", "\n", "
\n", "\n", "
\n", "
\n", "mood\n", "
\n", "
str
\n", "\n", " ✅ Gramatical mood of the verb (passive, etc)\n", "\n", "
\n", "\n", "
\n", "
\n", "morph\n", "
\n", "
str
\n", "\n", " ✅ Morphological tag (Sandborg-Petersen morphology)\n", "\n", "
\n", "\n", "
\n", "
\n", "nodeID\n", "
\n", "
str
\n", "\n", " ✅ Node ID (as in the XML source data)\n", "\n", "
\n", "\n", "
\n", "
\n", "normalized\n", "
\n", "
str
\n", "\n", " ✅ Surface word with accents normalized and trailing punctuations removed\n", "\n", "
\n", "\n", "
\n", "
\n", "nu\n", "
\n", "
str
\n", "\n", " ✅ Gramatical number (Singular, Plural)\n", "\n", "
\n", "\n", "
\n", "
\n", "number\n", "
\n", "
str
\n", "\n", " ✅ Gramatical number of the verb (e.g. singular, plural)\n", "\n", "
\n", "\n", "
\n", "
\n", "otype\n", "
\n", "
str
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "person\n", "
\n", "
str
\n", "\n", " ✅ Gramatical person of the verb (first, second, third)\n", "\n", "
\n", "\n", "
\n", "
\n", "punctuation\n", "
\n", "
str
\n", "\n", " ✅ Punctuation after word\n", "\n", "
\n", "\n", "
\n", "
\n", "ref\n", "
\n", "
str
\n", "\n", " ✅ Value of the ref ID (taken from XML sourcedata)\n", "\n", "
\n", "\n", "
\n", "
\n", "reference\n", "
\n", "
str
\n", "\n", " ✅ Reference (to nodeID in XML source data, not yet post-processes)\n", "\n", "
\n", "\n", "
\n", "
\n", "roleclausedistance\n", "
\n", "
str
\n", "\n", " ⚠️ Distance to the wordgroup defining the syntactical role of this word\n", "\n", "
\n", "\n", "
\n", "
\n", "sentence\n", "
\n", "
int
\n", "\n", " ✅ Sentence number (counted per chapter)\n", "\n", "
\n", "\n", "
\n", "
\n", "sp\n", "
\n", "
str
\n", "\n", " ✅ Part of Speech (abbreviated)\n", "\n", "
\n", "\n", "
\n", "
\n", "sp_full\n", "
\n", "
str
\n", "\n", " ✅ Part of Speech (long description)\n", "\n", "
\n", "\n", "
\n", "
\n", "strongs\n", "
\n", "
str
\n", "\n", " ✅ Strongs number\n", "\n", "
\n", "\n", "
\n", "
\n", "subj_ref\n", "
\n", "
str
\n", "\n", " 🆗 Subject reference (to nodeID in XML source data, not yet post-processes)\n", "\n", "
\n", "\n", "
\n", "
\n", "tense\n", "
\n", "
str
\n", "\n", " ✅ Gramatical tense of the verb (e.g. Present, Aorist)\n", "\n", "
\n", "\n", "
\n", "
\n", "type\n", "
\n", "
str
\n", "\n", " ✅ Gramatical type of noun or pronoun (e.g. Common, Personal)\n", "\n", "
\n", "\n", "
\n", "
\n", "unicode\n", "
\n", "
str
\n", "\n", " ✅ Word as it apears in the text in Unicode (incl. punctuations)\n", "\n", "
\n", "\n", "
\n", "
\n", "verse\n", "
\n", "
int
\n", "\n", " ✅ Verse number inside chapter\n", "\n", "
\n", "\n", "
\n", "
\n", "voice\n", "
\n", "
str
\n", "\n", " ✅ Gramatical voice of the verb (e.g. active,passive)\n", "\n", "
\n", "\n", "
\n", "
\n", "wgclass\n", "
\n", "
str
\n", "\n", " ✅ Class of the wordgroup (e.g. cl, np, vp)\n", "\n", "
\n", "\n", "
\n", "
\n", "wglevel\n", "
\n", "
int
\n", "\n", " 🆗 Number of the parent wordgroups for a wordgroup\n", "\n", "
\n", "\n", "
\n", "
\n", "wgnum\n", "
\n", "
int
\n", "\n", " ✅ Wordgroup number (counted per book)\n", "\n", "
\n", "\n", "
\n", "
\n", "wgrole\n", "
\n", "
str
\n", "\n", " ✅ Syntactical role of the wordgroup (abbreviated)\n", "\n", "
\n", "\n", "
\n", "
\n", "wgrolelong\n", "
\n", "
str
\n", "\n", " ✅ Syntactical role of the wordgroup (full)\n", "\n", "
\n", "\n", "
\n", "
\n", "wgrule\n", "
\n", "
str
\n", "\n", " ✅ Wordgroup rule information (e.g. Np-Appos, ClCl2, PrepNp)\n", "\n", "
\n", "\n", "
\n", "
\n", "wgtype\n", "
\n", "
str
\n", "\n", " ✅ Wordgroup type details (e.g. group, apposition)\n", "\n", "
\n", "\n", "
\n", "
\n", "word\n", "
\n", "
str
\n", "\n", " ✅ Word as it appears in the text (excl. punctuations)\n", "\n", "
\n", "\n", "
\n", "
\n", "wordlevel\n", "
\n", "
str
\n", "\n", " 🆗 Number of the parent wordgroups for a word\n", "\n", "
\n", "\n", "
\n", "
\n", "wordrole\n", "
\n", "
str
\n", "\n", " ✅ Syntactical role of the word (abbreviated)\n", "\n", "
\n", "\n", "
\n", "
\n", "wordrolelong\n", "
\n", "
str
\n", "\n", " ✅ Syntactical role of the word (full)\n", "\n", "
\n", "\n", "
\n", "
\n", "wordtranslit\n", "
\n", "
str
\n", "\n", " 🆗 Transliteration of the text (in latin letters, excl. punctuations)\n", "\n", "
\n", "\n", "
\n", "
\n", "wordunacc\n", "
\n", "
str
\n", "\n", " ✅ Word without accents (excl. punctuations)\n", "\n", "
\n", "\n", "
\n", "
\n", "oslots\n", "
\n", "
none
\n", "\n", " \n", "\n", "
\n", "\n", "
\n", "
\n", "\n", " Settings:
specified
  1. apiVersion: 3
  2. appName: tonyjurg/Nestle1904LFT
  3. appPath:C:/Users/tonyj/text-fabric-data/github/tonyjurg/Nestle1904LFT/app
  4. commit: no value
  5. css: ''
  6. dataDisplay:
    • excludedFeatures:
      • orig_order
      • verse
      • book
      • chapter
    • noneValues:
      • none
      • unknown
      • no value
      • NA
      • ''
    • showVerseInTuple: 0
    • textFormat: text-orig-full
  7. docs:
    • docBase: https://github.com/tonyjurg/Nestle1904LFT/blob/main/docs/
    • docPage: about
    • docRoot: https://github.com/tonyjurg/Nestle1904LFT
    • featureBase:https://github.com/tonyjurg/Nestle1904LFT/blob/main/docs/features/<feature>.md
  8. interfaceDefaults: {fmt: layout-orig-full}
  9. isCompatible: True
  10. local: no value
  11. localDir:C:/Users/tonyj/text-fabric-data/github/tonyjurg/Nestle1904LFT/_temp
  12. provenanceSpec:
    • corpus: Nestle 1904 (Low Fat Tree)
    • doi: notyet
    • org: tonyjurg
    • relative: /tf
    • repo: Nestle1904LFT
    • repro: Nestle1904LFT
    • version: 0.6
    • webBase: https://learner.bible/text/show_text/nestle1904/
    • webHint: Show this on the Bible Online Learner website
    • webLang: en
    • webUrl:https://learner.bible/text/show_text/nestle1904/<1>/<2>/<3>
    • webUrlLex: {webBase}/word?version={version}&id=<lid>
  13. release: no value
  14. typeDisplay:
    • book:
      • condense: True
      • hidden: True
      • label: {book}
      • style: ''
    • chapter:
      • condense: True
      • hidden: True
      • label: {chapter}
      • style: ''
    • sentence:
      • hidden: 0
      • label: #{sentence} (start: {book} {chapter}:{headverse})
      • style: ''
    • verse:
      • condense: True
      • excludedFeatures: chapter verse
      • label: {book} {chapter}:{verse}
      • style: ''
    • wg:
      • hidden: 0
      • label:#{wgnum}: {wgtype} {wgclass} {clausetype} {wgrole} {wgrule} {junction}
      • style: ''
    • word:
      • base: True
      • features: lemma
      • featuresBare: gloss
      • surpress: chapter verse
  15. writing: grc
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
TF API: names N F E L T S C TF Fs Fall Es Eall Cs Call directly usable

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# load the N1904 app and data\n", "N1904 = use (\"tonyjurg/Nestle1904LFT\", version=\"0.6\", hoist=globals())" ] }, { "cell_type": "code", "execution_count": 3, "id": "fd900844-9d58-4954-929e-a93fbf1c8ff9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Feature: bookshort \n", "\n", "\t value\t frequency\n", "\t Luke \t 19457\n", "\t Acts \t 18394\n", "\t Matt \t 18300\n", "\t John \t 15644\n", "\t Mark \t 11278\n", "\t Rev \t 9833\n", "\t Rom \t 7101\n", "\t 1Cor \t 6821\n", "\t Heb \t 4956\n", "\t 2Cor \t 4470\n", "\t Eph \t 2420\n", "\t Gal \t 2229\n", "\t 1John \t 2137\n", "\t Jas \t 1740\n", "\t 1Pet \t 1677\n", "\t Phil \t 1631\n", "\t 1Tim \t 1589\n", "\t Col \t 1576\n", "\t 1Thess \t 1474\n", "\t 2Tim \t 1238\n", "\t 2Pet \t 1099\n", "\t 2Thess \t 823\n", "\t Titus \t 659\n", "\t Jude \t 458\n", "\t Phlm \t 336\n", "\t 2John \t 246\n", "\t 3John \t 220\n", "\n", "\n", "Feature: wgrole \n", "\n", "\t value\t frequency\n", "\t \t 69235\n", "\t adv \t 16710\n", "\t o \t 9329\n", "\t s \t 6710\n", "\t p \t 1770\n", "\t io \t 702\n", "\t v \t 405\n", "\t aux \t 360\n", "\t o2 \t 171\n", "\t topic \t 25\n", "\t vc \t 10\n", "\t ellipsis \t 2\n", "\t tail \t 1\n", "\n", "\n", "Feature: wgtype \n", "\n", "\t value\t frequency\n", "\t \t 92932\n", "\t group \t 9699\n", "\t apposition \t 2799\n", "\n", "\n", "Feature: wgrole \n", "\n", "\t value\t frequency\n", "\t \t 69235\n", "\t adv \t 16710\n", "\t o \t 9329\n", "\t s \t 6710\n", "\t p \t 1770\n", "\t io \t 702\n", "\t v \t 405\n", "\t aux \t 360\n", "\t o2 \t 171\n", "\t topic \t 25\n", "\t vc \t 10\n", "\t ellipsis \t 2\n", "\t tail \t 1\n", "\n", "\n", "Feature: degree \n", "\n", "\t value\t frequency\n", "\t \t 137266\n", "\t comparative \t 313\n", "\t superlative \t 200\n", "\n", "\n", "Feature: gn \n", "\n", "\t value\t frequency\n", "\t \t 63804\n", "\t masculine \t 41486\n", "\t feminine \t 18736\n", "\t neuter \t 13753\n", "\n", "\n", "Feature: junction \n", "\n", "\t value\t frequency\n", "\t \t 103128\n", "\t apposition \t 2302\n", "\n", "\n", "Feature: mood \n", "\n", "\t value\t frequency\n", "\t \t 109422\n", "\t indicative \t 15617\n", "\t participle \t 6653\n", "\t infinitive \t 2285\n", "\t imperative \t 1877\n", "\t subjunctive \t 1856\n", "\t optative \t 69\n", "\n", "\n", "Feature: nu \n", "\n", "\t value\t frequency\n", "\t singular \t 69846\n", "\t \t 38842\n", "\t plural \t 29091\n", "\n", "\n", "Feature: person \n", "\n", "\t value\t frequency\n", "\t \t 118360\n", "\t third \t 12747\n", "\t second \t 3729\n", "\t first \t 2943\n", "\n", "\n", "Feature: wgrule \n", "\n", "\t value\t frequency\n", "\t DetNP \t 15696\n", "\t \t 14701\n", "\t PrepNp \t 11044\n", "\t NPofNP \t 6819\n", "\t Conj-CL \t 5571\n", "\t CLaCL \t 3668\n", "\t sub-CL \t 3114\n", "\t V2CL \t 2753\n", "\t V-O \t 2660\n", "\t DetCL \t 2011\n", "\t Np-Appos \t 1908\n", "\t V-ADV \t 1642\n", "\t NpAdjp \t 1371\n", "\t AdjpNp \t 1368\n", "\t NpaNp \t 1366\n", "\t DetAdj \t 1282\n", "\t ADV-V \t 1073\n", "\t O-V \t 1050\n", "\t that-VP \t 903\n", "\t NP-CL \t 874\n", "\t All-NP \t 846\n", "\t Np2CL \t 781\n", "\t ClCl \t 707\n", "\t NpPp \t 676\n", "\t ADV-V-O \t 625\n", "\t S-V \t 618\n", "\t AdvpNp \t 616\n", "\t ofNPNP \t 607\n", "\t V-S \t 587\n", "\t V-O-ADV \t 540\n", "\t PtclCL \t 538\n", "\t S-V-O \t 483\n", "\t S-V-ADV \t 450\n", "\t ADV-V-ADV \t 426\n", "\t NP-Demo \t 412\n", "\t S-VC-P \t 391\n", "\t ClCl2 \t 355\n", "\t S-ADV-V \t 351\n", "\t V-S-ADV \t 301\n", "\t S-P \t 292\n", "\t P-VC \t 280\n", "\t V-ADV-ADV \t 278\n", "\t O-V-ADV \t 264\n", "\t VC-P \t 260\n", "\t NPDetAdj \t 236\n", "\t S-P-VC \t 231\n", "\t NumpNP \t 219\n", "\t ADV-V-S \t 214\n", "\t V-IO \t 211\n", "\t P-S \t 199\n", "\t S-O-V \t 194\n", "\t notCLbutCL2CL \t 191\n", "\t P-VC-S \t 187\n", "\t Conj3Np \t 180\n", "\t S-ADV-V-O \t 180\n", "\t Conj2VP \t 176\n", "\t BeVerb \t 175\n", "\t Conj3CL \t 170\n", "\t S2CL \t 170\n", "\t O-ADV-V \t 164\n", "\t Demo-NP \t 162\n", "\t ADV2CL \t 160\n", "\t S-V-O-ADV \t 159\n", "\t ADV-ADV-V \t 156\n", "\t P2CL \t 151\n", "\t ADV-S-V \t 148\n", "\t S-ADV-V-ADV \t 147\n", "\t ADV-O-V \t 145\n", "\t PronNP \t 144\n", "\t V-S-O \t 141\n", "\t aCLaCL \t 141\n", "\t Conj2Pp \t 137\n", "\t O2CL \t 137\n", "\t V-IO-O \t 132\n", "\t V-ADV-O \t 129\n", "\t 2Advp_h1 \t 123\n", "\t AdjpaAdjp \t 121\n", "\t PpNp2Np \t 121\n", "\t ADV-V-O-ADV \t 114\n", "\t S-V-ADV-ADV \t 113\n", "\t AdvPp \t 110\n", "\t aNpaNp \t 108\n", "\t V-ADV-S \t 106\n", "\t NpNump \t 101\n", "\t V-O-S \t 101\n", "\t S-ADV \t 96\n", "\t NP-all \t 95\n", "\t VC-S-P \t 90\n", "\t O-V-S \t 88\n", "\t O-V-IO \t 87\n", "\t ADV-S-V-ADV \t 83\n", "\t ADV-V-ADV-ADV \t 83\n", "\t O-S-V \t 80\n", "\t ADV-V-S-ADV \t 77\n", "\t V-O-ADV-ADV \t 76\n", "\t ADV-ADV-V-ADV \t 75\n", "\t ADV-ADV-V-O \t 75\n", "\t ADV-S \t 74\n", "\t AdjpAdvp \t 74\n", "\t V-O-IO \t 65\n", "\t ADV-ADV \t 64\n", "\t ADV-S-V-O \t 64\n", "\t DetNump \t 63\n", "\t P-VC-ADV \t 58\n", "\t ADV-O \t 55\n", "\t ADV-P \t 55\n", "\t S-P-ADV \t 55\n", "\t 2Pp \t 54\n", "\t AdvpCL \t 54\n", "\t S-P-VC-ADV \t 53\n", "\t V-S-ADV-ADV \t 53\n", "\t VC-P-ADV \t 52\n", "\t ADV-V-S-O \t 49\n", "\t Intj2CL \t 49\n", "\t V-O-O2 \t 49\n", "\t notNPbutNP \t 49\n", "\t Conj4Np \t 48\n", "\t IO-V \t 48\n", "\t 2Np \t 46\n", "\t NpAdvp \t 46\n", "\t S-ADV-VC-P \t 46\n", "\t ADV-VC-P \t 45\n", "\t O-V-ADV-ADV \t 45\n", "\t S-O-V-ADV \t 44\n", "\t AdvpAdjp \t 43\n", "\t S-ADV-ADV-V \t 43\n", "\t VC-P-S \t 43\n", "\t ClClCl \t 42\n", "\t V-IO-ADV \t 42\n", "\t V-IO-S \t 42\n", "\t P-ADV \t 41\n", "\t V-ADV-ADV-ADV \t 40\n", "\t ADV-V-IO \t 39\n", "\t O-ADV \t 39\n", "\t ADV-P-VC \t 38\n", "\t S-ADV-V-O-ADV \t 38\n", "\t S-O-ADV-V \t 38\n", "\t S-VC-P-ADV \t 38\n", "\t P-S-VC \t 37\n", "\t S-IO \t 36\n", "\t S-V-IO \t 36\n", "\t S-ADV-V-ADV-ADV \t 35\n", "\t Conj4CL \t 34\n", "\t V-ADV-S-ADV \t 33\n", "\t ADV-S-ADV-V \t 32\n", "\t VerbBe \t 32\n", "\t S-ADV-P \t 31\n", "\t ADV-V-ADV-S \t 30\n", "\t notPPbutPP \t 30\n", "\t ADV-V-IO-O \t 29\n", "\t S-ADV-O-V \t 29\n", "\t ADV-O-V-ADV \t 28\n", "\t O-V-S-ADV \t 27\n", "\t CLandCL2 \t 26\n", "\t O-ADV-V-ADV \t 26\n", "\t S-O \t 26\n", "\t S-V-IO-O \t 26\n", "\t O-IO-V \t 25\n", "\t V-S-O-ADV \t 25\n", "\t ADV-S-V-ADV-ADV \t 24\n", "\t P-ADV-VC \t 24\n", "\t ADV-V-ADV-O \t 23\n", "\t S-VC-ADV-P \t 23\n", "\t ADV-S-V-O-ADV \t 22\n", "\t DetAdv \t 22\n", "\t IO-V-O \t 22\n", "\t IO2CL \t 22\n", "\t ADV-ADV-ADV-V \t 21\n", "\t ADV-VC-S-P \t 21\n", "\t Conj3Pp \t 21\n", "\t S-ADV-ADV-V-O \t 21\n", "\t ADV-ADV-V-S \t 20\n", "\t O-S-ADV-V \t 20\n", "\t S-VC \t 20\n", "\t V-O-S-ADV \t 20\n", "\t VC-S-P-ADV \t 20\n", "\t 2Advp_h2 \t 19\n", "\t ADV-ADV-O-V \t 19\n", "\t ADV-S-ADV-V-ADV \t 19\n", "\t ADV-S-ADV-V-O \t 19\n", "\t IO-O \t 19\n", "\t O-V-IO-ADV \t 19\n", "\t O2-V-O \t 19\n", "\t ADV-ADV-V-O-ADV \t 18\n", "\t ADV-S-P \t 18\n", "\t ADV-S-P-VC \t 18\n", "\t NpNpNp \t 18\n", "\t O-S-V-IO \t 18\n", "\t V-ADV-O-ADV \t 18\n", "\t ADV-V-IO-ADV \t 17\n", "\t ADV-V-O-S \t 17\n", "\t ClClClCl \t 17\n", "\t O-S-V-ADV \t 17\n", "\t S-P-ADV-VC \t 17\n", "\t S-V-ADV-O \t 17\n", "\t ADV-ADV-V-ADV-ADV \t 16\n", "\t ADV-V-O-IO \t 16\n", "\t NpPron \t 16\n", "\t P-S-ADV \t 16\n", "\t ADV-ADV-S-V \t 15\n", "\t ADV-V-S-O-ADV \t 15\n", "\t AdjpPp \t 15\n", "\t CL-NP \t 15\n", "\t NpNpNpNp \t 15\n", "\t V-S-ADV-ADV-ADV \t 15\n", "\t intjNP \t 15\n", "\t ADV-P-S \t 14\n", "\t ADV-V-ADV-ADV-ADV \t 14\n", "\t ADV-V-S-ADV-ADV \t 14\n", "\t AdjpAdjp \t 14\n", "\t EitherOrVp \t 14\n", "\t S-ADV-ADV-V-ADV \t 14\n", "\t S-ADV-P-VC \t 14\n", "\t V-IO-S-ADV \t 14\n", "\t ADV-O-ADV-V \t 13\n", "\t ADV-VC-P-S \t 13\n", "\t Conj5Np \t 13\n", "\t IO-S-ADV \t 13\n", "\t NP-Prep \t 13\n", "\t O-ADV-ADV-V \t 13\n", "\t O-V-IO-S \t 13\n", "\t S-V-ADV-ADV-ADV \t 13\n", "\t VC-ADV-P \t 13\n", "\t aPpaPp \t 13\n", "\t ADV-P-VC-S \t 12\n", "\t NumpNump \t 12\n", "\t P-VC-S-ADV \t 12\n", "\t S-V-IO-ADV \t 12\n", "\t V-S-ADV-O \t 12\n", "\t aCLaCLaCL \t 12\n", "\t ADV-ADV-ADV \t 11\n", "\t ADV-P-VC-ADV \t 11\n", "\t ADV-S-O-V \t 11\n", "\t ADV-V-O-ADV-ADV \t 11\n", "\t IO-O-V \t 11\n", "\t IO-V-S \t 11\n", "\t P-ADV-S \t 11\n", "\t V-IO-O-ADV \t 11\n", "\t V-O-IO-ADV \t 11\n", "\t ADV-ADV-ADV-V-O \t 10\n", "\t ADV-ADV-V-IO \t 10\n", "\t ADV-V-IO-S \t 10\n", "\t ADV-VC-P-ADV \t 10\n", "\t ADVaADV \t 10\n", "\t ClClClClCl \t 10\n", "\t O-O2-V \t 10\n", "\t S-IO-V \t 10\n", "\t S-P-ADV-ADV \t 10\n", "\t ADV-ADV-V-S-ADV \t 9\n", "\t ADV-V-O-S-ADV \t 9\n", "\t AdjpDative \t 9\n", "\t O-V-S-ADV-ADV \t 9\n", "\t O2-O-V \t 9\n", "\t P-VC-ADV-S \t 9\n", "\t S-V-O-ADV-ADV \t 9\n", "\t S-V-O-O2 \t 9\n", "\t V-IO-S-O \t 9\n", "\t aNpaNpaNp \t 9\n", "\t ADV-ADV-ADV-V-ADV \t 8\n", "\t ADV-P-ADV \t 8\n", "\t ADV-S-VC-P \t 8\n", "\t ADV-V-ADV-O-ADV \t 8\n", "\t Conj3Adjp \t 8\n", "\t Conj7Np \t 8\n", "\t IO-S-V \t 8\n", "\t NpNpNpNpNp \t 8\n", "\t O-ADV-V-S \t 8\n", "\t O-V-ADV-ADV-ADV \t 8\n", "\t O-V-O2 \t 8\n", "\t P-ADV-VC-S \t 8\n", "\t P-VC-ADV-ADV \t 8\n", "\t S-ADV-ADV \t 8\n", "\t S-IO-ADV \t 8\n", "\t S-V-O-IO \t 8\n", "\t ADV-ADV-P \t 7\n", "\t ADV-IO-V \t 7\n", "\t ADV-IO-V-O \t 7\n", "\t ADV-O-V-IO \t 7\n", "\t ADV-V-ADV-S-ADV \t 7\n", "\t ADV-V-O-O2 \t 7\n", "\t ADV-VC-S-P-ADV \t 7\n", "\t AdvAdv \t 7\n", "\t AdvpaAdvp \t 7\n", "\t Conj6Np \t 7\n", "\t O-ADV-V-IO \t 7\n", "\t O-IO \t 7\n", "\t S-ADV-ADV-ADV-V \t 7\n", "\t S-ADV-V-ADV-O \t 7\n", "\t S-ADV-V-IO \t 7\n", "\t V-ADV-ADV-S \t 7\n", "\t V-IO-ADV-O \t 7\n", "\t V-O-ADV-ADV-ADV \t 7\n", "\t 3Adjp \t 6\n", "\t ADV-ADV-VC-P \t 6\n", "\t ADV-O-V-ADV-ADV \t 6\n", "\t ADV-VC \t 6\n", "\t AdjpCL \t 6\n", "\t ClClClClClCl \t 6\n", "\t Conj4Pp \t 6\n", "\t Conj5CL \t 6\n", "\t Conj6CL \t 6\n", "\t EitherOr4Np \t 6\n", "\t IO-S \t 6\n", "\t NumpAdjp \t 6\n", "\t O-ADV-ADV \t 6\n", "\t O-ADV-S-V \t 6\n", "\t O-O2 \t 6\n", "\t O-V-ADV-S \t 6\n", "\t P-S-VC-ADV \t 6\n", "\t S-ADV-ADV-V-ADV-ADV \t 6\n", "\t S-ADV-ADV-V-O-ADV \t 6\n", "\t S-IO-V-ADV \t 6\n", "\t S-IO-V-O \t 6\n", "\t S-O-V-ADV-ADV \t 6\n", "\t S-O-V-O2 \t 6\n", "\t V-IO-ADV-ADV \t 6\n", "\t V-S-IO \t 6\n", "\t notADVbutADV \t 6\n", "\t notCLbutCL \t 6\n", "\t 2CLaCL \t 5\n", "\t ADV-ADV-P-VC \t 5\n", "\t ADV-ADV-S-V-ADV \t 5\n", "\t ADV-ADV-S-V-O \t 5\n", "\t ADV-ADV-V-S-O \t 5\n", "\t ADV-IO \t 5\n", "\t ADV-O-ADV \t 5\n", "\t ADV-O-V-O2 \t 5\n", "\t ADV-P-S-ADV \t 5\n", "\t ADV-S-ADV \t 5\n", "\t ADV-S-ADV-V-ADV-ADV \t 5\n", "\t ADV-V-IO-O-ADV \t 5\n", "\t ADV-V-O-ADV-ADV-ADV \t 5\n", "\t AdjpofNp \t 5\n", "\t Conj2Nump \t 5\n", "\t EitherOr4CL \t 5\n", "\t IO-ADV-V \t 5\n", "\t NumpNumpNump \t 5\n", "\t O-ADV-V-ADV-ADV \t 5\n", "\t O-V-S-IO \t 5\n", "\t S-ADV-ADV-O-V \t 5\n", "\t S-ADV-V-IO-O \t 5\n", "\t S-P-VC-ADV-ADV \t 5\n", "\t V-ADV-O-ADV-ADV \t 5\n", "\t V-O-O2-ADV \t 5\n", "\t V-O-S-O2 \t 5\n", "\t VC-P-ADV-S \t 5\n", "\t VC-S-ADV-P \t 5\n", "\t ADV-ADV-V-O-S \t 4\n", "\t ADV-ADV-VC-P-ADV \t 4\n", "\t ADV-O-V-S \t 4\n", "\t ADV-O-V-S-ADV \t 4\n", "\t ADV-S-ADV-V-ADV-O \t 4\n", "\t ADV-S-O-V-ADV \t 4\n", "\t ADV-S-P-VC-ADV \t 4\n", "\t ADV-S-V-IO-O \t 4\n", "\t AdjpAdjpAdjpAdjp \t 4\n", "\t Conj2Nump3 \t 4\n", "\t Conj2P \t 4\n", "\t Conj3VP \t 4\n", "\t DativeAdjp \t 4\n", "\t IO-S-V-O \t 4\n", "\t IO-V-S-ADV \t 4\n", "\t O-IO-ADV-V \t 4\n", "\t O-IO-V-ADV \t 4\n", "\t O-S-V-ADV-ADV \t 4\n", "\t O2-V-O-ADV \t 4\n", "\t P-ADV-ADV \t 4\n", "\t PpPpPp \t 4\n", "\t PpPpPpPp \t 4\n", "\t S-ADV-O-V-ADV \t 4\n", "\t S-ADV-P-VC-ADV \t 4\n", "\t S-ADV-V-IO-ADV \t 4\n", "\t S-ADV-V-O-IO \t 4\n", "\t S-ADV-VC-P-ADV \t 4\n", "\t S-O-V-IO \t 4\n", "\t S-V-IO-O-ADV \t 4\n", "\t V-ADV-IO \t 4\n", "\t V-O2-O \t 4\n", "\t VC-ADV-ADV-P \t 4\n", "\t VC-ADV-S-P \t 4\n", "\t VC-P-ADV-ADV \t 4\n", "\t aPpaPpaPp \t 4\n", "\t 4NpaNp \t 3\n", "\t ADV-ADV-ADV-O-V \t 3\n", "\t ADV-ADV-O-V-ADV \t 3\n", "\t ADV-ADV-S \t 3\n", "\t ADV-ADV-S-ADV-V-O \t 3\n", "\t ADV-ADV-V-ADV-O \t 3\n", "\t ADV-ADV-VC-P-S \t 3\n", "\t ADV-IO-O-V \t 3\n", "\t ADV-IO-S \t 3\n", "\t ADV-IO-V-ADV \t 3\n", "\t ADV-O-ADV-V-ADV \t 3\n", "\t ADV-O-IO-V \t 3\n", "\t ADV-O-S-V \t 3\n", "\t ADV-P-ADV-VC \t 3\n", "\t ADV-S-ADV-ADV-V \t 3\n", "\t ADV-S-ADV-O-V \t 3\n", "\t ADV-S-ADV-V-ADV-ADV-ADV \t 3\n", "\t ADV-S-IO \t 3\n", "\t ADV-S-V-ADV-ADV-ADV \t 3\n", "\t ADV-V-ADV-ADV-S \t 3\n", "\t ADV-V-ADV-IO \t 3\n", "\t ADV-V-O-S-ADV-ADV \t 3\n", "\t ADV-V-S-ADV-ADV-ADV \t 3\n", "\t AdjpAdjpAdjpAdjpAdjpAdjp \t 3\n", "\t ClClClClClClCl \t 3\n", "\t Conj7CL \t 3\n", "\t Conj8Np \t 3\n", "\t EitherOrAdjp \t 3\n", "\t IO-ADV-V-S \t 3\n", "\t IO-S-O-V \t 3\n", "\t IO-V-ADV \t 3\n", "\t NpNpNpNpNpNp \t 3\n", "\t O-ADV-ADV-V-ADV \t 3\n", "\t O-IO-V-S \t 3\n", "\t O-S-ADV-V-ADV \t 3\n", "\t O-V-IO-S-ADV \t 3\n", "\t O-V-S-O2 \t 3\n", "\t O2-O-V-ADV \t 3\n", "\t P-S-ADV-VC \t 3\n", "\t PP-Adjp \t 3\n", "\t S-ADV-O-V-IO \t 3\n", "\t S-ADV-V-ADV-ADV-ADV \t 3\n", "\t S-ADV-V-O-ADV-ADV \t 3\n", "\t S-ADV-V-O-O2 \t 3\n", "\t S-ADV-VC-ADV-P \t 3\n", "\t S-IO-O-V \t 3\n", "\t S-O-ADV-V-ADV \t 3\n", "\t S-O-IO-V \t 3\n", "\t S-V-ADV-O-ADV \t 3\n", "\t V-ADV-ADV-O \t 3\n", "\t V-ADV-ADV-S-ADV \t 3\n", "\t V-ADV-S-ADV-ADV \t 3\n", "\t V-ADV-S-O \t 3\n", "\t V-IO-O-ADV-ADV \t 3\n", "\t V-IO-O-S \t 3\n", "\t V-IO-S-O-ADV \t 3\n", "\t V-O-ADV-O2 \t 3\n", "\t V-S-O-ADV-ADV \t 3\n", "\t VC-S-P-ADV-ADV \t 3\n", "\t aAdvpaAdvp \t 3\n", "\t 12Np \t 2\n", "\t 2PpaPp \t 2\n", "\t 7Np \t 2\n", "\t ADV-ADV-ADV-S-V-ADV \t 2\n", "\t ADV-ADV-ADV-V-ADV-ADV \t 2\n", "\t ADV-ADV-IO-V \t 2\n", "\t ADV-ADV-S-P-VC \t 2\n", "\t ADV-ADV-S-VC-P \t 2\n", "\t ADV-ADV-V-ADV-S \t 2\n", "\t ADV-ADV-V-IO-O \t 2\n", "\t ADV-ADV-V-O-ADV-ADV \t 2\n", "\t ADV-ADV-V-O-IO \t 2\n", "\t ADV-ADV-VC-S-P \t 2\n", "\t ADV-IO-S-V \t 2\n", "\t ADV-IO-V-S-ADV \t 2\n", "\t ADV-P-ADV-S \t 2\n", "\t ADV-P-VC-ADV-S \t 2\n", "\t ADV-P-VC-S-ADV \t 2\n", "\t ADV-S-ADV-ADV-V-ADV \t 2\n", "\t ADV-S-ADV-ADV-V-ADV-ADV \t 2\n", "\t ADV-S-ADV-ADV-V-O \t 2\n", "\t ADV-S-ADV-ADV-V-O-ADV \t 2\n", "\t ADV-S-ADV-O-V-ADV \t 2\n", "\t ADV-S-ADV-V-O-ADV \t 2\n", "\t ADV-S-P-ADV-VC \t 2\n", "\t ADV-S-V-ADV-O \t 2\n", "\t ADV-S-V-IO-ADV \t 2\n", "\t ADV-S-V-O-ADV-ADV \t 2\n", "\t ADV-V-ADV-IO-O \t 2\n", "\t ADV-V-IO-ADV-O \t 2\n", "\t ADV-V-IO-S-ADV \t 2\n", "\t ADV-V-O-ADV-S \t 2\n", "\t ADV-V-O-O2-ADV \t 2\n", "\t ADV-V-S-IO-ADV \t 2\n", "\t ADV-VC-ADV-P \t 2\n", "\t ADV-VC-P-S-ADV \t 2\n", "\t AdjpAdjpAdjpAdjpAdjp \t 2\n", "\t AdjpAdjpAdjpAdjpAdjpAdjpAdjp \t 2\n", "\t AdjpAdvp2Advp \t 2\n", "\t CLa2CL \t 2\n", "\t ClClClClClClClClCl \t 2\n", "\t ClClClClClClClClClCl \t 2\n", "\t ClClClClClClClClClClClCl \t 2\n", "\t Conj2Nump2 \t 2\n", "\t Conj3Advp \t 2\n", "\t Conj5AdjP \t 2\n", "\t ConjNp \t 2\n", "\t EitherOr3Vp \t 2\n", "\t IO-ADV \t 2\n", "\t IO-ADV-V-ADV \t 2\n", "\t IO-V-ADV-ADV \t 2\n", "\t IO-V-O-ADV \t 2\n", "\t IO-V-S-O \t 2\n", "\t NpNpNpNpNpNpNpNp \t 2\n", "\t NumpNumpNump2 \t 2\n", "\t NumpNumpNump3 \t 2\n", "\t O-ADV-S-ADV-V \t 2\n", "\t O-ADV-V-O2 \t 2\n", "\t O-ADV-V-S-ADV \t 2\n", "\t O-S-IO-V \t 2\n", "\t O-S-V-IO-ADV \t 2\n", "\t O-V-ADV-ADV-ADV-ADV \t 2\n", "\t O2-S-V-O \t 2\n", "\t P-VC-S-ADV-ADV \t 2\n", "\t PpAdvp \t 2\n", "\t PpPpPpPpPp \t 2\n", "\t S-ADV-ADV-V-ADV-O \t 2\n", "\t S-ADV-IO-V-O \t 2\n", "\t S-ADV-O-ADV-V \t 2\n", "\t S-IO-ADV-V \t 2\n", "\t S-O-ADV \t 2\n", "\t S-O-ADV-V-IO \t 2\n", "\t S-P-ADV-VC-ADV \t 2\n", "\t S-V-ADV-ADV-ADV-ADV \t 2\n", "\t S-V-IO-ADV-ADV \t 2\n", "\t S-V-O-ADV-O2 \t 2\n", "\t S-V-O-O2-ADV-ADV \t 2\n", "\t S-VC-ADV-ADV-P-ADV \t 2\n", "\t V-ADV-ADV-ADV-ADV \t 2\n", "\t V-ADV-IO-ADV \t 2\n", "\t V-IO-ADV-S \t 2\n", "\t V-IO-ADV-S-ADV \t 2\n", "\t V-IO-O-O2 \t 2\n", "\t V-IO-S-ADV-ADV \t 2\n", "\t V-O-ADV-ADV-ADV-ADV \t 2\n", "\t V-O-IO-ADV-ADV \t 2\n", "\t V-O-IO-O2 \t 2\n", "\t V-O-S-ADV-ADV \t 2\n", "\t V-O-S-ADV-ADV-ADV \t 2\n", "\t V-S-ADV-ADV-IO \t 2\n", "\t V-S-O-IO \t 2\n", "\t VC-ADV-P-ADV \t 2\n", "\t VC-ADV-P-S-ADV \t 2\n", "\t VC-P-S-ADV \t 2\n", "\t VC2CL \t 2\n", "\t VpVp \t 2\n", "\t notVPbutVP \t 2\n", "\t 2CLaCLaCL \t 1\n", "\t 2NpaNpaNp \t 1\n", "\t 3NpaNp \t 1\n", "\t ADV-ADV-ADV-ADV-ADV-V \t 1\n", "\t ADV-ADV-ADV-ADV-V \t 1\n", "\t ADV-ADV-ADV-ADV-V-S-ADV-ADV \t 1\n", "\t ADV-ADV-ADV-S-ADV \t 1\n", "\t ADV-ADV-ADV-S-O-V \t 1\n", "\t ADV-ADV-ADV-S-V \t 1\n", "\t ADV-ADV-ADV-S-V-O \t 1\n", "\t ADV-ADV-ADV-V-ADV-ADV-ADV \t 1\n", "\t ADV-ADV-ADV-V-ADV-O \t 1\n", "\t ADV-ADV-ADV-V-O-ADV \t 1\n", "\t ADV-ADV-ADV-V-O-ADV-ADV-ADV \t 1\n", "\t ADV-ADV-ADV-V-S-ADV-ADV \t 1\n", "\t ADV-ADV-IO \t 1\n", "\t ADV-ADV-O \t 1\n", "\t ADV-ADV-O-S-V \t 1\n", "\t ADV-ADV-O-V-ADV-ADV \t 1\n", "\t ADV-ADV-O-V-S \t 1\n", "\t ADV-ADV-O-V-S-ADV \t 1\n", "\t ADV-ADV-O-V-S-IO \t 1\n", "\t ADV-ADV-P-ADV \t 1\n", "\t ADV-ADV-P-ADV-VC-ADV \t 1\n", "\t ADV-ADV-P-ADV-VC-S \t 1\n", "\t ADV-ADV-P-VC-S \t 1\n", "\t ADV-ADV-S-ADV-ADV-V \t 1\n", "\t ADV-ADV-S-ADV-V \t 1\n", "\t ADV-ADV-S-ADV-V-ADV \t 1\n", "\t ADV-ADV-S-O-V \t 1\n", "\t ADV-ADV-S-P \t 1\n", "\t ADV-ADV-S-V-O-ADV \t 1\n", "\t ADV-ADV-S-VC-P-ADV \t 1\n", "\t ADV-ADV-V-ADV-ADV-ADV \t 1\n", "\t ADV-ADV-V-IO-ADV \t 1\n", "\t ADV-ADV-V-IO-ADV-ADV \t 1\n", "\t ADV-ADV-V-IO-S \t 1\n", "\t ADV-ADV-V-IO-S-O-ADV \t 1\n", "\t ADV-ADV-V-S-ADV-ADV \t 1\n", "\t ADV-ADV-V-S-ADV-ADV-ADV \t 1\n", "\t ADV-ADV-V-S-ADV-O \t 1\n", "\t ADV-ADV-VC-P-ADV-S \t 1\n", "\t ADV-ADV-VC-P-S-ADV \t 1\n", "\t ADV-IO-ADV \t 1\n", "\t ADV-IO-ADV-S-ADV \t 1\n", "\t ADV-IO-ADV-V \t 1\n", "\t ADV-IO-V-O-ADV \t 1\n", "\t ADV-O-ADV-ADV-V \t 1\n", "\t ADV-O-ADV-V-O2 \t 1\n", "\t ADV-O-ADV-V-S-ADV \t 1\n", "\t ADV-O-O2-V \t 1\n", "\t ADV-O-O2-V-ADV \t 1\n", "\t ADV-O-S-V-O2 \t 1\n", "\t ADV-O-V-ADV-S \t 1\n", "\t ADV-O-V-IO-ADV \t 1\n", "\t ADV-O2-O-IO-V \t 1\n", "\t ADV-O2-O-V \t 1\n", "\t ADV-O2-O-V-ADV \t 1\n", "\t ADV-O2-V-O \t 1\n", "\t ADV-P-ADV-ADV \t 1\n", "\t ADV-P-ADV-VC-S \t 1\n", "\t ADV-P-S-VC \t 1\n", "\t ADV-P-VC-ADV-S-ADV \t 1\n", "\t ADV-S-ADV-ADV-ADV-V-O \t 1\n", "\t ADV-S-ADV-ADV-O-ADV-V \t 1\n", "\t ADV-S-ADV-ADV-V-ADV-ADV-ADV \t 1\n", "\t ADV-S-ADV-ADV-V-ADV-ADV-O \t 1\n", "\t ADV-S-ADV-ADV-V-O-IO-ADV \t 1\n", "\t ADV-S-ADV-P \t 1\n", "\t ADV-S-ADV-P-VC-ADV \t 1\n", "\t ADV-S-ADV-V-ADV-IO-ADV \t 1\n", "\t ADV-S-ADV-V-IO-ADV \t 1\n", "\t ADV-S-ADV-V-O-O2-ADV \t 1\n", "\t ADV-S-ADV-VC-P \t 1\n", "\t ADV-S-IO-ADV-ADV \t 1\n", "\t ADV-S-IO-V-O \t 1\n", "\t ADV-S-O-ADV \t 1\n", "\t ADV-S-O-ADV-V \t 1\n", "\t ADV-S-O-V-O2 \t 1\n", "\t ADV-S-P-ADV \t 1\n", "\t ADV-S-V-ADV-ADV-ADV-ADV \t 1\n", "\t ADV-S-V-IO \t 1\n", "\t ADV-S-V-IO-ADV-ADV \t 1\n", "\t ADV-S-V-O-IO-ADV \t 1\n", "\t ADV-S-V-O-O2 \t 1\n", "\t ADV-V-ADV-ADV-ADV-ADV \t 1\n", "\t ADV-V-ADV-ADV-O-ADV \t 1\n", "\t ADV-V-ADV-ADV-S-ADV \t 1\n", "\t ADV-V-ADV-O-S-ADV \t 1\n", "\t ADV-V-ADV-S-ADV-ADV \t 1\n", "\t ADV-V-IO-ADV-S \t 1\n", "\t ADV-V-IO-S-O \t 1\n", "\t ADV-V-O-ADV-S-IO-ADV \t 1\n", "\t ADV-V-O-IO-ADV \t 1\n", "\t ADV-V-S-ADV-ADV-ADV-ADV \t 1\n", "\t ADV-V-S-ADV-O \t 1\n", "\t ADV-V-S-ADV-O-ADV \t 1\n", "\t ADV-V-S-IO \t 1\n", "\t ADV-V-S-O-ADV-ADV \t 1\n", "\t ADV-VC-ADV-P-ADV \t 1\n", "\t ADV-VC-P-ADV-ADV \t 1\n", "\t ADV-VC-P-ADV-S \t 1\n", "\t ADV-VC-S-ADV-P \t 1\n", "\t AdjpAdjp2 \t 1\n", "\t AdvpAdvpAdvp \t 1\n", "\t AdvpNump \t 1\n", "\t CLandClClandClandClandCl \t 1\n", "\t ClClClClClClClCl \t 1\n", "\t Conj12CL \t 1\n", "\t Conj12Np \t 1\n", "\t Conj13CL \t 1\n", "\t Conj14CL \t 1\n", "\t Conj3ADV \t 1\n", "\t Conj5Pp \t 1\n", "\t Conj6P \t 1\n", "\t Conj7Pp \t 1\n", "\t Conj9Np \t 1\n", "\t ConjConj \t 1\n", "\t EitherAdvpOrPp \t 1\n", "\t EitherOr10Np \t 1\n", "\t EitherOr4Advp \t 1\n", "\t EitherOr4Vp \t 1\n", "\t EitherOr5Vp \t 1\n", "\t EitherOr7CL \t 1\n", "\t EitherOr8Np \t 1\n", "\t IO-ADV-ADV-V-ADV \t 1\n", "\t IO-ADV-S-ADV-V-O-ADV \t 1\n", "\t IO-O-ADV-ADV-ADV \t 1\n", "\t IO-O-ADV-V \t 1\n", "\t IO-O-ADV-V-S \t 1\n", "\t IO-O-S-V-ADV \t 1\n", "\t IO-S-ADV-ADV \t 1\n", "\t IO-S-ADV-V \t 1\n", "\t IO-S-O-V-ADV \t 1\n", "\t IO-S-V-ADV \t 1\n", "\t IO-S-V-O-ADV \t 1\n", "\t IO-V-ADV-O \t 1\n", "\t IO-V-O-O2-ADV \t 1\n", "\t NpNpNpNpNpNpNpNpNp \t 1\n", "\t NpNpNpNpNpNpNpNpNpNp \t 1\n", "\t NpNpNpNpNpNpNpNpNpNpNpNpNpNpNpAndNp \t 1\n", "\t NpNpNpNpNpNpNpNpNpNpNpNpNpNpNpNp \t 1\n", "\t O-ADV-ADV-ADV \t 1\n", "\t O-ADV-ADV-ADV-V \t 1\n", "\t O-ADV-ADV-V-IO \t 1\n", "\t O-ADV-IO-V \t 1\n", "\t O-ADV-V-ADV-S \t 1\n", "\t O-ADV-V-ADV-S-ADV-ADV-ADV \t 1\n", "\t O-ADV-V-O2-ADV \t 1\n", "\t O-ADV-V-S-ADV-ADV-ADV-ADV-ADV \t 1\n", "\t O-IO-ADV-ADV-V \t 1\n", "\t O-IO-V-ADV-ADV \t 1\n", "\t O-O2-ADV \t 1\n", "\t O-O2-IO \t 1\n", "\t O-O2-V-ADV \t 1\n", "\t O-O2-V-IO-ADV \t 1\n", "\t O-S \t 1\n", "\t O-S-ADV-V-IO \t 1\n", "\t O-S-O2-V-ADV-ADV \t 1\n", "\t O-S-V-ADV-ADV-ADV \t 1\n", "\t O-V-ADV-S-ADV \t 1\n", "\t O-V-IO-ADV-ADV \t 1\n", "\t O-V-IO-S-O2-ADV \t 1\n", "\t O-V-O2-IO \t 1\n", "\t O-V-S-ADV-ADV-ADV \t 1\n", "\t O-V-S-ADV-IO \t 1\n", "\t O-V-S-O2-ADV \t 1\n", "\t O2-ADV-V-O \t 1\n", "\t O2-O-V-ADV-ADV \t 1\n", "\t O2-V \t 1\n", "\t O2-V-ADV-O \t 1\n", "\t O2-V-ADV-O-ADV \t 1\n", "\t O2-V-S-O \t 1\n", "\t P-ADV-ADV-ADV \t 1\n", "\t P-ADV-ADV-S \t 1\n", "\t P-ADV-ADV-S-VC \t 1\n", "\t P-ADV-S-ADV \t 1\n", "\t P-ADV-S-ADV-VC \t 1\n", "\t P-ADV-S-VC \t 1\n", "\t P-ADV-VC-ADV \t 1\n", "\t P-S-ADV-ADV \t 1\n", "\t P-VC-ADV-S-ADV \t 1\n", "\t PpPpPpPpPpPpPpPpPpPpPpPpPpPpPpPpPpPpPpPpPp \t 1\n", "\t S-ADV-ADV-ADV-ADV-O-V \t 1\n", "\t S-ADV-ADV-ADV-O-IO-V \t 1\n", "\t S-ADV-ADV-ADV-O-V \t 1\n", "\t S-ADV-ADV-ADV-V-ADV \t 1\n", "\t S-ADV-ADV-ADV-V-ADV-ADV \t 1\n", "\t S-ADV-ADV-ADV-V-O \t 1\n", "\t S-ADV-ADV-ADV-V-O-ADV \t 1\n", "\t S-ADV-ADV-IO \t 1\n", "\t S-ADV-ADV-O-V-ADV \t 1\n", "\t S-ADV-ADV-O2-V-O \t 1\n", "\t S-ADV-ADV-P \t 1\n", "\t S-ADV-ADV-P-VC \t 1\n", "\t S-ADV-ADV-V-IO \t 1\n", "\t S-ADV-ADV-V-O-ADV-ADV \t 1\n", "\t S-ADV-ADV-V-O-IO \t 1\n", "\t S-ADV-ADV-VC-P-ADV \t 1\n", "\t S-ADV-IO-O \t 1\n", "\t S-ADV-IO-V \t 1\n", "\t S-ADV-O \t 1\n", "\t S-ADV-O-V-ADV-ADV \t 1\n", "\t S-ADV-O-V-O2-IO \t 1\n", "\t S-ADV-O2-V-ADV \t 1\n", "\t S-ADV-P-ADV-ADV \t 1\n", "\t S-ADV-V-ADV-IO \t 1\n", "\t S-ADV-V-ADV-O-ADV-ADV \t 1\n", "\t S-ADV-V-IO-O-ADV \t 1\n", "\t S-ADV-V-O-ADV-O2-ADV \t 1\n", "\t S-ADV-V-O-IO-ADV \t 1\n", "\t S-ADV-VC-P-ADV-ADV \t 1\n", "\t S-IO-O \t 1\n", "\t S-IO-O-ADV-V \t 1\n", "\t S-IO-V-ADV-ADV \t 1\n", "\t S-IO-V-ADV-O \t 1\n", "\t S-IO-V-O-ADV \t 1\n", "\t S-O-ADV-IO-V \t 1\n", "\t S-O-IO-V-ADV \t 1\n", "\t S-O-O2-V \t 1\n", "\t S-O-V-IO-ADV \t 1\n", "\t S-O-V-O2-ADV \t 1\n", "\t S-O2-O-V \t 1\n", "\t S-O2-V-O \t 1\n", "\t S-P-ADV-ADV-ADV \t 1\n", "\t S-P-ADV-ADV-VC \t 1\n", "\t S-V-ADV-ADV-ADV-O \t 1\n", "\t S-V-ADV-O-IO-ADV \t 1\n", "\t S-V-IO-O-ADV-ADV \t 1\n", "\t S-V-O-ADV-ADV-ADV \t 1\n", "\t S-V-O-O2-ADV \t 1\n", "\t S-VC-ADV \t 1\n", "\t S-VC-ADV-ADV-P \t 1\n", "\t S-VC-ADV-P-ADV \t 1\n", "\t S-VC-P-ADV-ADV \t 1\n", "\t S-VC-P-ADV-ADV-ADV \t 1\n", "\t V-ADV-ADV-ADV-O \t 1\n", "\t V-ADV-ADV-ADV-S \t 1\n", "\t V-ADV-ADV-ADV-S-ADV-ADV \t 1\n", "\t V-ADV-IO-ADV-S-ADV \t 1\n", "\t V-ADV-IO-O \t 1\n", "\t V-ADV-O2-ADV \t 1\n", "\t V-IO-ADV-ADV-O \t 1\n", "\t V-O-ADV-ADV-ADV-ADV-ADV \t 1\n", "\t V-O-ADV-ADV-S-ADV \t 1\n", "\t V-O-ADV-IO \t 1\n", "\t V-O-ADV-IO-ADV-ADV \t 1\n", "\t V-O-O2-ADV-ADV-ADV \t 1\n", "\t V-O-O2-IO \t 1\n", "\t V-O-O2-IO-ADV \t 1\n", "\t V-O-S-IO-ADV \t 1\n", "\t V-O-S-O2-ADV \t 1\n", "\t V-O2 \t 1\n", "\t V-O2-O-ADV \t 1\n", "\t V-S-ADV-O-ADV \t 1\n", "\t V-S-ADV-O-ADV-ADV \t 1\n", "\t V-S-ADV-O-IO \t 1\n", "\t V-S-IO-ADV \t 1\n", "\t V-S-IO-O \t 1\n", "\t V-S-IO-O-O2 \t 1\n", "\t V-S-O-IO-ADV \t 1\n", "\t VC-ADV-ADV-S-P \t 1\n", "\t VC-ADV-P-S \t 1\n", "\t VC-P-ADV-S-ADV \t 1\n", "\t VC-P-S-ADV-ADV \t 1\n", "\t VC-S \t 1\n", "\n", "\n" ] } ], "source": [ "FeatureList=('bookshort','wgrole','wgtype', 'wgrole', 'degree','gn','junction','mood','nu','person','wgrule')\n", "for Feature in FeatureList:\n", " if Feature=='otype': break # this feature needs to be skipped.\n", " print ('Feature:',Feature,'\\n\\n\\t value\\t frequency')\n", " FeatureFrequenceLists=Fs(Feature).freqList()\n", " for item, freq in FeatureFrequenceLists:\n", " print ('\\t',item,'\\t',freq)\n", "\n", " print ('\\n')" ] }, { "cell_type": "code", "execution_count": null, "id": "01da00bb-1844-41b9-83e9-bda46c69dc5f", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "d9d91d6a-f752-4f7d-9c2c-cb04e5f0e319", "metadata": {}, "outputs": [], "source": [ "## Value list of selected features" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 5 }