{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<img align=\"right\" src=\"images/tf-small.png\" width=\"128\"/>\n",
    "<img align=\"right\" src=\"images/phblogo.png\" width=\"128\"/>\n",
    "<img align=\"right\" src=\"images/dans.png\"/>\n",
    "\n",
    "---\n",
    "Start with [convert](https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs/convert.ipynb)\n",
    "\n",
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use the Banks example corpus"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load TF\n",
    "\n",
    "We (down)load the corpus."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tf.app import use"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<b title=\"local commit\">TF-app:</b> <span title=\"#5df379a68800729cc207cde836cd3ba77a0ed018 offline under ~/text-fabric-data\">~/text-fabric-data/annotation/banks/app</span>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<b title=\"local release\">data:</b> <span title=\"rv3.0=#4fc68c6f0bf20ef7070a82ec534416bf39b836a2 offline under ~/text-fabric-data\">~/text-fabric-data/annotation/banks/tf/0.2</span>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This is Text-Fabric 9.2.2\n",
      "Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html\n",
      "\n",
      "10 features found and 0 ignored\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<b>Text-Fabric:</b> <a target=\"_blank\" href=\"https://annotation.github.io/text-fabric/tf/cheatsheet.html\" title=\"text-fabric-api\">Text-Fabric API 9.2.2</a>, <a target=\"_blank\" href=\"https://github.com/annotation/banks/blob/master/app\" title=\"annotation/banks TF-app\">annotation/banks/app  v3</a>, <a target=\"_blank\" href=\"https://annotation.github.io/text-fabric/tf/about/searchusage.html\" title=\"Search Templates Introduction and Reference\">Search Reference</a><br><b>Data:</b> <a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs/convert.ipynb\" title=\"provenance of Two quotes from Consider Phlebas by Iain M. Banks\">BANKS</a>, <a target=\"_blank\" href=\"https://annotation.github.io/text-fabric/tf/writing/.html\" title=\"How TF features represent text\">Character table</a>, <a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"BANKS feature documentation\">Feature docs</a><br><b>Features:</b><br>\n",
       "<details><summary><b>Two quotes from Consider Phlebas by Iain M. Banks</b></summary>\n",
       "    <div class=\"fcorpus\">\n",
       "\n",
       "<div class=\"frow\">\n",
       "    <div class=\"fnamecat \">\n",
       "<a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"~/text-fabric-data/annotation/banks/tf/0.2/author.tf\">author</a>\n",
       "</div>\n",
       "<div class=\"fmono\">str</div>\n",
       "<details>\n",
       "    <summary>the author of a book</summary>\n",
       "    <div class=\"fmeta\">\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">compiler:</div>\n",
       "    <div>Dirk Roorda</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">dateWritten:</div>\n",
       "    <div>2020-02-13T13:37:47Z</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">name:</div>\n",
       "    <div>Culture quotes from Iain Banks</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">purpose:</div>\n",
       "    <div>exposition</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">source:</div>\n",
       "    <div>Good Reads</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">status:</div>\n",
       "    <div>with for similarities in a separate module</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">url:</div>\n",
       "    <div>https://www.goodreads.com/work/quotes/14366-consider-phlebas</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">version:</div>\n",
       "    <div>0.2</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">writtenBy:</div>\n",
       "    <div>Text-Fabric</div>\n",
       "</div>\n",
       "\n",
       "        </div>\n",
       "    </details>\n",
       "</div>\n",
       "\n",
       "<div class=\"frow\">\n",
       "    <div class=\"fnamecat \">\n",
       "<a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"~/text-fabric-data/annotation/banks/tf/0.2/gap.tf\">gap</a>\n",
       "</div>\n",
       "<div class=\"fmono\">int</div>\n",
       "<details>\n",
       "    <summary>1 for words that occur between [ ], which are inserted by the editor</summary>\n",
       "    <div class=\"fmeta\">\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">compiler:</div>\n",
       "    <div>Dirk Roorda</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">dateWritten:</div>\n",
       "    <div>2020-02-13T13:37:47Z</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">name:</div>\n",
       "    <div>Culture quotes from Iain Banks</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">purpose:</div>\n",
       "    <div>exposition</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">source:</div>\n",
       "    <div>Good Reads</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">status:</div>\n",
       "    <div>with for similarities in a separate module</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">url:</div>\n",
       "    <div>https://www.goodreads.com/work/quotes/14366-consider-phlebas</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">version:</div>\n",
       "    <div>0.2</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">writtenBy:</div>\n",
       "    <div>Text-Fabric</div>\n",
       "</div>\n",
       "\n",
       "        </div>\n",
       "    </details>\n",
       "</div>\n",
       "\n",
       "<div class=\"frow\">\n",
       "    <div class=\"fnamecat \">\n",
       "<a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"~/text-fabric-data/annotation/banks/tf/0.2/letters.tf\">letters</a>\n",
       "</div>\n",
       "<div class=\"fmono\">str</div>\n",
       "<details>\n",
       "    <summary>the letters of a word</summary>\n",
       "    <div class=\"fmeta\">\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">compiler:</div>\n",
       "    <div>Dirk Roorda</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">dateWritten:</div>\n",
       "    <div>2020-02-13T13:37:47Z</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">name:</div>\n",
       "    <div>Culture quotes from Iain Banks</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">purpose:</div>\n",
       "    <div>exposition</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">source:</div>\n",
       "    <div>Good Reads</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">status:</div>\n",
       "    <div>with for similarities in a separate module</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">url:</div>\n",
       "    <div>https://www.goodreads.com/work/quotes/14366-consider-phlebas</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">version:</div>\n",
       "    <div>0.2</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">writtenBy:</div>\n",
       "    <div>Text-Fabric</div>\n",
       "</div>\n",
       "\n",
       "        </div>\n",
       "    </details>\n",
       "</div>\n",
       "\n",
       "<div class=\"frow\">\n",
       "    <div class=\"fnamecat \">\n",
       "<a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"~/text-fabric-data/annotation/banks/tf/0.2/number.tf\">number</a>\n",
       "</div>\n",
       "<div class=\"fmono\">int</div>\n",
       "<details>\n",
       "    <summary>number of chapter, or sentence in chapter, or line in sentence</summary>\n",
       "    <div class=\"fmeta\">\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">compiler:</div>\n",
       "    <div>Dirk Roorda</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">dateWritten:</div>\n",
       "    <div>2020-02-13T13:37:47Z</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">name:</div>\n",
       "    <div>Culture quotes from Iain Banks</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">purpose:</div>\n",
       "    <div>exposition</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">source:</div>\n",
       "    <div>Good Reads</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">status:</div>\n",
       "    <div>with for similarities in a separate module</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">url:</div>\n",
       "    <div>https://www.goodreads.com/work/quotes/14366-consider-phlebas</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">version:</div>\n",
       "    <div>0.2</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">writtenBy:</div>\n",
       "    <div>Text-Fabric</div>\n",
       "</div>\n",
       "\n",
       "        </div>\n",
       "    </details>\n",
       "</div>\n",
       "\n",
       "<div class=\"frow\">\n",
       "    <div class=\"fnamecat \">\n",
       "<a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"~/text-fabric-data/annotation/banks/tf/0.2/otype.tf\">otype</a>\n",
       "</div>\n",
       "<div class=\"fmono\">str</div>\n",
       "<details>\n",
       "    <summary></summary>\n",
       "    <div class=\"fmeta\">\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">compiler:</div>\n",
       "    <div>Dirk Roorda</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">dateWritten:</div>\n",
       "    <div>2020-02-13T13:37:47Z</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">name:</div>\n",
       "    <div>Culture quotes from Iain Banks</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">purpose:</div>\n",
       "    <div>exposition</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">source:</div>\n",
       "    <div>Good Reads</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">status:</div>\n",
       "    <div>with for similarities in a separate module</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">url:</div>\n",
       "    <div>https://www.goodreads.com/work/quotes/14366-consider-phlebas</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">version:</div>\n",
       "    <div>0.2</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">writtenBy:</div>\n",
       "    <div>Text-Fabric</div>\n",
       "</div>\n",
       "\n",
       "        </div>\n",
       "    </details>\n",
       "</div>\n",
       "\n",
       "<div class=\"frow\">\n",
       "    <div class=\"fnamecat \">\n",
       "<a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"~/text-fabric-data/annotation/banks/tf/0.2/punc.tf\">punc</a>\n",
       "</div>\n",
       "<div class=\"fmono\">str</div>\n",
       "<details>\n",
       "    <summary>the punctuation after a word</summary>\n",
       "    <div class=\"fmeta\">\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">compiler:</div>\n",
       "    <div>Dirk Roorda</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">dateWritten:</div>\n",
       "    <div>2020-02-13T13:37:47Z</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">name:</div>\n",
       "    <div>Culture quotes from Iain Banks</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">purpose:</div>\n",
       "    <div>exposition</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">remark:</div>\n",
       "    <div>a bit more info is needed</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">source:</div>\n",
       "    <div>Good Reads</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">status:</div>\n",
       "    <div>with for similarities in a separate module</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">url:</div>\n",
       "    <div>https://www.goodreads.com/work/quotes/14366-consider-phlebas</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">version:</div>\n",
       "    <div>0.2</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">writtenBy:</div>\n",
       "    <div>Text-Fabric</div>\n",
       "</div>\n",
       "\n",
       "        </div>\n",
       "    </details>\n",
       "</div>\n",
       "\n",
       "<div class=\"frow\">\n",
       "    <div class=\"fnamecat \">\n",
       "<a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"~/text-fabric-data/annotation/banks/tf/0.2/terminator.tf\">terminator</a>\n",
       "</div>\n",
       "<div class=\"fmono\">str</div>\n",
       "<details>\n",
       "    <summary>the last character of a line</summary>\n",
       "    <div class=\"fmeta\">\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">compiler:</div>\n",
       "    <div>Dirk Roorda</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">dateWritten:</div>\n",
       "    <div>2020-02-13T13:37:47Z</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">name:</div>\n",
       "    <div>Culture quotes from Iain Banks</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">purpose:</div>\n",
       "    <div>exposition</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">source:</div>\n",
       "    <div>Good Reads</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">status:</div>\n",
       "    <div>with for similarities in a separate module</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">url:</div>\n",
       "    <div>https://www.goodreads.com/work/quotes/14366-consider-phlebas</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">version:</div>\n",
       "    <div>0.2</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">writtenBy:</div>\n",
       "    <div>Text-Fabric</div>\n",
       "</div>\n",
       "\n",
       "        </div>\n",
       "    </details>\n",
       "</div>\n",
       "\n",
       "<div class=\"frow\">\n",
       "    <div class=\"fnamecat \">\n",
       "<a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"~/text-fabric-data/annotation/banks/tf/0.2/title.tf\">title</a>\n",
       "</div>\n",
       "<div class=\"fmono\">str</div>\n",
       "<details>\n",
       "    <summary>the title of a book</summary>\n",
       "    <div class=\"fmeta\">\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">compiler:</div>\n",
       "    <div>Dirk Roorda</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">dateWritten:</div>\n",
       "    <div>2020-02-13T13:37:47Z</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">name:</div>\n",
       "    <div>Culture quotes from Iain Banks</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">purpose:</div>\n",
       "    <div>exposition</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">source:</div>\n",
       "    <div>Good Reads</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">status:</div>\n",
       "    <div>with for similarities in a separate module</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">url:</div>\n",
       "    <div>https://www.goodreads.com/work/quotes/14366-consider-phlebas</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">version:</div>\n",
       "    <div>0.2</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">writtenBy:</div>\n",
       "    <div>Text-Fabric</div>\n",
       "</div>\n",
       "\n",
       "        </div>\n",
       "    </details>\n",
       "</div>\n",
       "\n",
       "<div class=\"frow\">\n",
       "    <div class=\"fnamecat edge\">\n",
       "<a target=\"_blank\" href=\"https://nbviewer.jupyter.org/github/annotation/banks/blob/master/programs\" title=\"~/text-fabric-data/annotation/banks/tf/0.2/oslots.tf\">oslots</a>\n",
       "</div>\n",
       "<div class=\"fmono\">none</div>\n",
       "<details>\n",
       "    <summary></summary>\n",
       "    <div class=\"fmeta\">\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">compiler:</div>\n",
       "    <div>Dirk Roorda</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">dateWritten:</div>\n",
       "    <div>2020-02-13T13:37:47Z</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">name:</div>\n",
       "    <div>Culture quotes from Iain Banks</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">purpose:</div>\n",
       "    <div>exposition</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">source:</div>\n",
       "    <div>Good Reads</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">status:</div>\n",
       "    <div>with for similarities in a separate module</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">url:</div>\n",
       "    <div>https://www.goodreads.com/work/quotes/14366-consider-phlebas</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">version:</div>\n",
       "    <div>0.2</div>\n",
       "</div>\n",
       "\n",
       "<div class=\"fmetarow\">\n",
       "    <div class=\"fmetakey\">writtenBy:</div>\n",
       "    <div>Text-Fabric</div>\n",
       "</div>\n",
       "\n",
       "        </div>\n",
       "    </details>\n",
       "</div>\n",
       "\n",
       "    </div>\n",
       "</details>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<style>tr.tf.ltr, td.tf.ltr, th.tf.ltr { text-align: left ! important;}\n",
       "tr.tf.rtl, td.tf.rtl, th.tf.rtl { text-align: right ! important;}\n",
       "@font-face {\n",
       "  font-family: \"Gentium Plus\";\n",
       "  src: local('Gentium Plus'), local('GentiumPlus'),\n",
       "    url('/server/static/fonts/GentiumPlus-R.woff') format('woff'),\n",
       "    url('https://github.com/annotation/text-fabric/blob/master/tf/server/static/fonts/GentiumPlus-R.woff?raw=true') format('woff');\n",
       "}\n",
       "\n",
       "@font-face {\n",
       "  font-family: \"Ezra SIL\";\n",
       "  src: local('Ezra SIL'), local('EzraSIL'),\n",
       "    url('/server/static/fonts/SILEOT.woff') format('woff'),\n",
       "    url('https://github.com/annotation/text-fabric/blob/master/tf/server/static/fonts/SILEOT.woff?raw=true') format('woff');\n",
       "}\n",
       "\n",
       "@font-face {\n",
       "  font-family: \"SBL Hebrew\";\n",
       "  src: local('SBL Hebrew'), local('SBLHebrew'),\n",
       "    url('/server/static/fonts/SBL_Hbrw.woff') format('woff'),\n",
       "    url('https://github.com/annotation/text-fabric/blob/master/tf/server/static/fonts/SBL_Hbrw.woff?raw=true') format('woff');\n",
       "}\n",
       "\n",
       "@font-face {\n",
       "  font-family: \"Estrangelo Edessa\";\n",
       "  src: local('Estrangelo Edessa'), local('EstrangeloEdessa');\n",
       "    url('/server/static/fonts/SyrCOMEdessa.woff') format('woff'),\n",
       "    url('https://github.com/annotation/text-fabric/blob/master/tf/server/static/fonts/SyrCOMEdessa.woff?raw=true') format('woff');\n",
       "}\n",
       "\n",
       "@font-face {\n",
       "  font-family: AmiriQuran;\n",
       "  font-style: normal;\n",
       "  font-weight: 400;\n",
       "  src: local('Amiri Quran'), local('AmiriQuran'),\n",
       "    url('/server/static/fonts/AmiriQuran.woff') format('woff'),\n",
       "    url('https://github.com/annotation/text-fabric/blob/master/tf/server/static/fonts/AmiriQuran.woff?raw=true') format('woff');\n",
       "}\n",
       "\n",
       "@font-face {\n",
       "  font-family: AmiriQuranColored;\n",
       "  font-style: normal;\n",
       "  font-weight: 400;\n",
       "  src: local('Amiri Quran Colored'), local('AmiriQuranColored'),\n",
       "    url('/server/static/fonts/AmiriQuranColored.woff') format('woff'),\n",
       "    url('https://github.com/annotation/text-fabric/blob/master/tf/server/static/fonts/AmiriQuranColored.woff?raw=true') format('woff');\n",
       "}\n",
       "\n",
       "@font-face {\n",
       "  font-family: \"Santakku\";\n",
       "  src: local('Santakku'),\n",
       "    url('/server/static/fonts/Santakku.woff') format('woff'),\n",
       "    url('https://github.com/annotation/text-fabric/blob/master/tf/server/static/fonts/Santakku.woff?raw=true') format('woff');\n",
       "}\n",
       "\n",
       "@font-face {\n",
       "  font-family: \"SantakkuM\";\n",
       "  src: local('SantakkuM'),\n",
       "    url('/server/static/fonts/SantakkuM.woff') format('woff'),\n",
       "    url('https://github.com/annotation/text-fabric/blob/master/tf/server/static/fonts/SantakkuM.woff?raw=true') format('woff');\n",
       "}\n",
       "/* bypassing some classical notebook settings */\n",
       "div#notebook {\n",
       "  line-height: unset;\n",
       "}\n",
       "/* neutral text */\n",
       ".txtn,.txtn a:visited,.txtn a:link {\n",
       "    font-family: sans-serif;\n",
       "    font-size: medium;\n",
       "    direction: ltr;\n",
       "    unicode-bidi: embed;\n",
       "    text-decoration: none;\n",
       "    color: var(--text-color);\n",
       "}\n",
       "/* transcription text */\n",
       ".txtt,.txtt a:visited,.txtt a:link {\n",
       "    font-family: monospace;\n",
       "    font-size: medium;\n",
       "    direction: ltr;\n",
       "    unicode-bidi: embed;\n",
       "    text-decoration: none;\n",
       "    color: var(--text-color);\n",
       "}\n",
       "/* source text */\n",
       ".txto,.txto a:visited,.txto a:link {\n",
       "    font-family: serif;\n",
       "    font-size: medium;\n",
       "    direction: ltr;\n",
       "    unicode-bidi: embed;\n",
       "    text-decoration: none;\n",
       "    color: var(--text-color);\n",
       "}\n",
       "/* phonetic text */\n",
       ".txtp,.txtp a:visited,.txtp a:link {\n",
       "    font-family: Gentium, sans-serif;\n",
       "    font-size: medium;\n",
       "    direction: ltr;\n",
       "    unicode-bidi: embed;\n",
       "    text-decoration: none;\n",
       "    color: var(--text-color);\n",
       "}\n",
       "/* original script text */\n",
       ".txtu,.txtu a:visited,.txtu a:link {\n",
       "    font-family: Gentium, sans-serif;\n",
       "    font-size: medium;\n",
       "    text-decoration: none;\n",
       "    color: var(--text-color);\n",
       "}\n",
       "/* hebrew */\n",
       ".txtu.hbo,.lex.hbo {\n",
       "    font-family: \"Ezra SIL\", \"SBL Hebrew\", sans-serif;\n",
       "    font-size: large;\n",
       "    direction: rtl ! important;\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       "/* syriac */\n",
       ".txtu.syc,.lex.syc {\n",
       "    font-family: \"Estrangelo Edessa\", sans-serif;\n",
       "    font-size: medium;\n",
       "    direction: rtl ! important;\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       "/* neo aramaic */\n",
       ".txtu.cld,.lex.cld {\n",
       "    font-family: \"CharisSIL-R\", sans-serif;\n",
       "    font-size: medium;\n",
       "    direction: ltr ! important;\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       "/* standard arabic */\n",
       ".txtu.ara,.lex.ara {\n",
       "    font-family: \"AmiriQuran\", sans-serif;\n",
       "    font-size: large;\n",
       "    direction: rtl ! important;\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       "/* cuneiform */\n",
       ".txtu.akk,.lex.akk {\n",
       "    font-family: Santakku, sans-serif;\n",
       "    font-size: large;\n",
       "    direction: ltr ! important;\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       "/* greek */\n",
       ".txtu.grc,.lex.grc a:link {\n",
       "    font-family: Gentium, sans-serif;\n",
       "    font-size: medium;\n",
       "    direction: ltr ! important;\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       "a:hover {\n",
       "    text-decoration: underline | important;\n",
       "    color: #0000ff | important;\n",
       "}\n",
       ".ltr {\n",
       "    direction: ltr ! important;\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       ".rtl {\n",
       "    direction: rtl ! important;\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       ".ubd {\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       ".col {\n",
       "   display: inline-block;\n",
       "}\n",
       ".features {\n",
       "    font-family: monospace;\n",
       "    font-size: medium;\n",
       "    font-weight: bold;\n",
       "    color: var(--features);\n",
       "    display: flex;\n",
       "    flex-flow: column nowrap;\n",
       "    justify-content: flex-start;\n",
       "    align-items: flex-start;\n",
       "    align-content: flex-start;\n",
       "    padding: 2px;\n",
       "    margin: 2px;\n",
       "    direction: ltr;\n",
       "    unicode-bidi: embed;\n",
       "    border: var(--meta-width) solid var(--meta-color);\n",
       "    border-radius: var(--meta-width);\n",
       "}\n",
       ".features div,.features span {\n",
       "    padding: 0;\n",
       "    margin: -2px 0;\n",
       "}\n",
       ".features .f {\n",
       "    font-family: sans-serif;\n",
       "    font-size: small;\n",
       "    font-weight: normal;\n",
       "    color: #5555bb;\n",
       "}\n",
       ".features .xft {\n",
       "  color: #000000;\n",
       "  background-color: #eeeeee;\n",
       "  font-size: medium;\n",
       "  margin: 2px 0px;\n",
       "}\n",
       ".features .xft .f {\n",
       "  color: #000000;\n",
       "  background-color: #eeeeee;\n",
       "  font-size: small;\n",
       "  font-weight: normal;\n",
       "}\n",
       ".tfsechead {\n",
       "    font-family: sans-serif;\n",
       "    font-size: small;\n",
       "    font-weight: bold;\n",
       "    color: var(--tfsechead);\n",
       "    unicode-bidi: embed;\n",
       "    text-align: start;\n",
       "}\n",
       ".structure {\n",
       "    font-family: sans-serif;\n",
       "    font-size: small;\n",
       "    font-weight: bold;\n",
       "    color: var(--structure);\n",
       "    unicode-bidi: embed;\n",
       "    text-align: start;\n",
       "}\n",
       ".comments {\n",
       "    display: flex;\n",
       "    justify-content: flex-start;\n",
       "    align-items: flex-start;\n",
       "    align-content: flex-start;\n",
       "    flex-flow: column nowrap;\n",
       "}\n",
       ".nd, a:link.nd {\n",
       "    font-family: sans-serif;\n",
       "    font-size: small;\n",
       "    color: var(--node);\n",
       "    vertical-align: super;\n",
       "    direction: ltr ! important;\n",
       "    unicode-bidi: embed;\n",
       "}\n",
       ".lex {\n",
       "  color: var(--lex-color);;\n",
       "}\n",
       ".children,.children.ltr {\n",
       "    display: flex;\n",
       "    border: 0;\n",
       "    background-color: #ffffff;\n",
       "    justify-content: flex-start;\n",
       "    align-items: flex-start;\n",
       "    align-content: flex-start;\n",
       "}\n",
       ".children.stretch {\n",
       "    align-items: stretch;\n",
       "}\n",
       ".children.hor {\n",
       "    flex-flow: row nowrap;\n",
       "}\n",
       ".children.hor.wrap {\n",
       "    flex-flow: row wrap;\n",
       "}\n",
       ".children.ver {\n",
       "    flex-flow: column nowrap;\n",
       "}\n",
       ".children.ver.wrap {\n",
       "    flex-flow: column wrap;\n",
       "}\n",
       ".contnr {\n",
       "    width: fit-content;\n",
       "    display: flex;\n",
       "    justify-content: flex-start;\n",
       "    align-items: flex-start;\n",
       "    align-content: flex-start;\n",
       "    flex-flow: column nowrap;\n",
       "    background: #ffffff none repeat scroll 0 0;\n",
       "    padding:  10px 2px 2px 2px;\n",
       "    margin: 16px 2px 2px 2px;\n",
       "    border-style: solid;\n",
       "    font-size: small;\n",
       "}\n",
       ".contnr.trm {\n",
       "    background-attachment: local;\n",
       "}\n",
       ".contnr.cnul {\n",
       "    padding:  0;\n",
       "    margin: 0;\n",
       "    border-style: solid;\n",
       "    font-size: xx-small;\n",
       "}\n",
       ".contnr.cnul,.lbl.cnul {\n",
       "    border-color: var(--border-color-nul);\n",
       "    border-width: var(--border-width-nul);\n",
       "    border-radius: var(--border-width-nul);\n",
       "}\n",
       ".contnr.c0,.lbl.c0 {\n",
       "    border-color: var(--border-color0);\n",
       "    border-width: var(--border-width0);\n",
       "    border-radius: var(--border-width0);\n",
       "}\n",
       ".contnr.c1,.lbl.c1 {\n",
       "    border-color: var(--border-color1);\n",
       "    border-width: var(--border-width1);\n",
       "    border-radius: var(--border-width1);\n",
       "}\n",
       ".contnr.c2,.lbl.c2 {\n",
       "    border-color: var(--border-color2);\n",
       "    border-width: var(--border-width2);\n",
       "    border-radius: var(--border-width2);\n",
       "}\n",
       ".contnr.c3,.lbl.c3 {\n",
       "    border-color: var(--border-color3);\n",
       "    border-width: var(--border-width3);\n",
       "    border-radius: var(--border-width3);\n",
       "}\n",
       ".contnr.c4,.lbl.c4 {\n",
       "    border-color: var(--border-color4);\n",
       "    border-width: var(--border-width4);\n",
       "    border-radius: var(--border-width4);\n",
       "}\n",
       "span.plain {\n",
       "    display: inline-block;\n",
       "    white-space: pre-wrap;\n",
       "}\n",
       ".plain {\n",
       "    background-color: #ffffff;\n",
       "}\n",
       ".plain.l,.contnr.l,.contnr.l>.lbl {\n",
       "    border-left-style: dotted\n",
       "}\n",
       ".plain.r,.contnr.r,.contnr.r>.lbl {\n",
       "    border-right-style: dotted\n",
       "}\n",
       ".plain.lno,.contnr.lno,.contnr.lno>.lbl {\n",
       "    border-left-style: none\n",
       "}\n",
       ".plain.rno,.contnr.rno,.contnr.rno>.lbl {\n",
       "    border-right-style: none\n",
       "}\n",
       ".plain.l {\n",
       "    padding-left: 4px;\n",
       "    margin-left: 2px;\n",
       "    border-width: var(--border-width-plain);\n",
       "}\n",
       ".plain.r {\n",
       "    padding-right: 4px;\n",
       "    margin-right: 2px;\n",
       "    border-width: var(--border-width-plain);\n",
       "}\n",
       ".lbl {\n",
       "    font-family: monospace;\n",
       "    margin-top: -24px;\n",
       "    margin-left: 20px;\n",
       "    background: #ffffff none repeat scroll 0 0;\n",
       "    padding: 0 6px;\n",
       "    border-style: solid;\n",
       "    display: block;\n",
       "    color: var(--label)\n",
       "}\n",
       ".lbl.trm {\n",
       "    background-attachment: local;\n",
       "    margin-top: 2px;\n",
       "    margin-left: 2px;\n",
       "    padding: 2px 2px;\n",
       "    border-style: none;\n",
       "}\n",
       ".lbl.cnul {\n",
       "    font-size: xx-small;\n",
       "}\n",
       ".lbl.c0 {\n",
       "    font-size: small;\n",
       "}\n",
       ".lbl.c1 {\n",
       "    font-size: small;\n",
       "}\n",
       ".lbl.c2 {\n",
       "    font-size: medium;\n",
       "}\n",
       ".lbl.c3 {\n",
       "    font-size: medium;\n",
       "}\n",
       ".lbl.c4 {\n",
       "    font-size: large;\n",
       "}\n",
       ".occs, a:link.occs {\n",
       "    font-size: small;\n",
       "}\n",
       "\n",
       "/* PROVENANCE */\n",
       "\n",
       "div.prov {\n",
       "\tmargin: 40px;\n",
       "\tpadding: 20px;\n",
       "\tborder: 2px solid var(--fog-rim);\n",
       "}\n",
       "div.pline {\n",
       "\tdisplay: flex;\n",
       "\tflex-flow: row nowrap;\n",
       "\tjustify-content: stretch;\n",
       "\talign-items: baseline;\n",
       "}\n",
       "div.p2line {\n",
       "\tmargin-left: 2em;\n",
       "\tdisplay: flex;\n",
       "\tflex-flow: row nowrap;\n",
       "\tjustify-content: stretch;\n",
       "\talign-items: baseline;\n",
       "}\n",
       "div.psline {\n",
       "\tdisplay: flex;\n",
       "\tflex-flow: row nowrap;\n",
       "\tjustify-content: stretch;\n",
       "\talign-items: baseline;\n",
       "\tbackground-color: var(--gold-mist-back);\n",
       "}\n",
       "div.pname {\n",
       "\tflex: 0 0 5rem;\n",
       "\tfont-weight: bold;\n",
       "}\n",
       "div.pval {\n",
       "    flex: 1 1 auto;\n",
       "}\n",
       "\n",
       "/* KEYBOARD */\n",
       ".ccoff {\n",
       "  background-color: inherit;\n",
       "}\n",
       ".ccon {\n",
       "  background-color: yellow ! important;\n",
       "}\n",
       "/* TF header */\n",
       "\n",
       "summary {\n",
       "  /* needed to override the normalize.less\n",
       "   * in the classical jupyter notebook\n",
       "   */\n",
       "  display: list-item ! important;\n",
       "}\n",
       "\n",
       ".fcorpus {\n",
       "  display: flex;\n",
       "  flex-flow: column nowrap;\n",
       "  justify-content: flex-start;\n",
       "  align-items: flex-start;\n",
       "  align-content: flex-start;\n",
       "}\n",
       ".frow {\n",
       "  display: flex;\n",
       "  flex-flow: row nowrap;\n",
       "  justify-content: flex-start;\n",
       "  align-items: flex-start;\n",
       "  align-content: flex-start;\n",
       "}\n",
       ".fmeta {\n",
       "  display: flex;\n",
       "  flex-flow: column nowrap;\n",
       "  justify-content: flex-start;\n",
       "  align-items: flex-start;\n",
       "  align-content: flex-start;\n",
       "}\n",
       ".fmetarow {\n",
       "  display: flex;\n",
       "  flex-flow: row nowrap;\n",
       "  justify-content: flex-start;\n",
       "  align-items: flex-start;\n",
       "  align-content: flex-start;\n",
       "}\n",
       ".fmetakey {\n",
       "  min-width: 10rem;\n",
       "  font-family: monospace;\n",
       "}\n",
       ".fnamecat {\n",
       "  min-width: 10rem;\n",
       "}\n",
       ".fnamecat.edge {\n",
       "  font-weight: bold;\n",
       "  font-style: italic;\n",
       "}\n",
       ".fmono {\n",
       "    font-family: monospace;\n",
       "}\n",
       "\n",
       ":root {\n",
       "\t--node:               hsla(120, 100%,  20%, 1.0  );\n",
       "\t--label:              hsla(  0, 100%,  20%, 1.0  );\n",
       "\t--tfsechead:          hsla(  0, 100%,  25%, 1.0  );\n",
       "\t--structure:          hsla(120, 100%,  25%, 1.0  );\n",
       "\t--features:           hsla(  0,   0%,  30%, 1.0  );\n",
       "  --text-color:         hsla( 60,  80%,  10%, 1.0  );\n",
       "  --lex-color:          hsla(220,  90%,  60%, 1.0  );\n",
       "  --meta-color:         hsla(  0,   0%,  90%, 0.7  );\n",
       "  --meta-width:         3px;\n",
       "  --border-color-nul:   hsla(  0,   0%,  90%, 0.5  );\n",
       "  --border-color0:      hsla(  0,   0%,  90%, 0.9  );\n",
       "  --border-color1:      hsla(  0,   0%,  80%, 0.9  );\n",
       "  --border-color2:      hsla(  0,   0%,  70%, 0.9  );\n",
       "  --border-color3:      hsla(  0,   0%,  80%, 0.8  );\n",
       "  --border-color4:      hsla(  0,   0%,  60%, 0.9  );\n",
       "  --border-width-nul:   2px;\n",
       "  --border-width0:      2px;\n",
       "  --border-width1:      3px;\n",
       "  --border-width2:      4px;\n",
       "  --border-width3:      6px;\n",
       "  --border-width4:      5px;\n",
       "  --border-width-plain: 2px;\n",
       "}\n",
       ".hl {\n",
       "  background-color: var(--hl-strong);\n",
       "}\n",
       "span.hl {\n",
       "\tbackground-color: var(--hl-strong);\n",
       "\tborder-width: 0;\n",
       "\tborder-radius: 2px;\n",
       "\tborder-style: solid;\n",
       "}\n",
       "div.contnr.hl,div.lbl.hl {\n",
       "  background-color: var(--hl-strong);\n",
       "}\n",
       "div.contnr.hl {\n",
       "  border-color: var(--hl-rim) ! important;\n",
       "\tborder-width: 4px ! important;\n",
       "}\n",
       "\n",
       "span.hlbx {\n",
       "\tborder-color: var(--hl-rim);\n",
       "\tborder-width: 4px ! important;\n",
       "\tborder-style: solid;\n",
       "\tborder-radius: 6px;\n",
       "  padding: 4px;\n",
       "  margin: 4px;\n",
       "}\n",
       "\n",
       "span.plain {\n",
       "  display: inline-block;\n",
       "  white-space: pre-wrap;\n",
       "}\n",
       "\n",
       ":root {\n",
       "\t--hl-strong:        hsla( 60, 100%,  70%, 0.9  );\n",
       "\t--hl-rim:           hsla( 55,  80%,  50%, 1.0  );\n",
       "}\n",
       "</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<script>\n",
       "const copyChar = (el, c) => {\n",
       "    for (const el of document.getElementsByClassName('ccon')) {\n",
       "        el.className = 'ccoff'\n",
       "    }\n",
       "    el.className = 'ccon'\n",
       "    navigator.clipboard.writeText(String.fromCharCode(c))\n",
       "}\n",
       "</script>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div><b>Text-Fabric API:</b> names <a target=\"_blank\" href=\"https://annotation.github.io/text-fabric/tf/cheatsheet.html\" title=\"doc\">N F E L T S C TF</a> directly usable</div><hr>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "A = use(\"annotation/banks\", hoist=globals())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "## Exploration\n",
    "\n",
    "Let's explore this corpus by means of Text-Fabric.\n",
    "\n",
    "### Frequency list\n",
    "\n",
    "We can get ordered frequency lists for the values of all features.\n",
    "\n",
    "First the words:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(('the', 8),\n",
       " ('of', 5),\n",
       " ('and', 4),\n",
       " ('in', 3),\n",
       " ('we', 3),\n",
       " ('everything', 2),\n",
       " ('know', 2),\n",
       " ('most', 2),\n",
       " ('ones', 2),\n",
       " ('patterns', 2),\n",
       " ('us', 2),\n",
       " ('Besides', 1),\n",
       " ('Culture', 1),\n",
       " ('Everything', 1),\n",
       " ('So', 1),\n",
       " ('a', 1),\n",
       " ('about', 1),\n",
       " ('aid', 1),\n",
       " ('any', 1),\n",
       " ('around', 1),\n",
       " ('as', 1),\n",
       " ('barbarian', 1),\n",
       " ('bottom', 1),\n",
       " ('can', 1),\n",
       " ('care', 1),\n",
       " ('climbing', 1),\n",
       " ('composed', 1),\n",
       " ('control', 1),\n",
       " ('dead', 1),\n",
       " ('elegant', 1),\n",
       " ('enjoyable', 1),\n",
       " ('final', 1),\n",
       " ('find', 1),\n",
       " ('free', 1),\n",
       " ('games', 1),\n",
       " ('good', 1),\n",
       " ('harness', 1),\n",
       " ('have', 1),\n",
       " ('high', 1),\n",
       " ('humans', 1),\n",
       " ('impossible', 1),\n",
       " ('is', 1),\n",
       " ('it', 1),\n",
       " ('languages', 1),\n",
       " ('left', 1),\n",
       " ('life', 1),\n",
       " ('line', 1),\n",
       " ('make', 1),\n",
       " ('mattered', 1),\n",
       " ('mountains', 1),\n",
       " ('not', 1),\n",
       " ('nothing', 1),\n",
       " ('our', 1),\n",
       " ('over', 1),\n",
       " ('own', 1),\n",
       " ('problems', 1),\n",
       " ('really', 1),\n",
       " ('romance', 1),\n",
       " ('safety', 1),\n",
       " ('societies', 1),\n",
       " ('sports', 1),\n",
       " ('studying', 1),\n",
       " ('such', 1),\n",
       " ('take', 1),\n",
       " ('terms', 1),\n",
       " ('that', 1),\n",
       " ('that’s', 1),\n",
       " ('things', 1),\n",
       " ('those', 1),\n",
       " ('to', 1),\n",
       " ('truth', 1),\n",
       " ('ultimately', 1),\n",
       " ('where', 1),\n",
       " ('why', 1),\n",
       " ('without', 1))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F.letters.freqList()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For the node types we can get info by calling this:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(('book', 99.0, 100, 100),\n",
       " ('chapter', 49.5, 101, 102),\n",
       " ('sentence', 33.0, 115, 117),\n",
       " ('line', 7.666666666666667, 103, 114),\n",
       " ('word', 1, 1, 99))"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "C.levels.data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It means that chapters are 49.5 words long on average, and that the chapter nodes are 101 and 102.\n",
    "\n",
    "And you see that we have 99 words."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Add to the banks corpus\n",
    "\n",
    "We are going to make a relationship between each pair of words, and we annotate each related pair with how similar they are.\n",
    "\n",
    "We measure the similarity by looking at the distinct letters in each word (lowercase), and computing the percentage of\n",
    "how many letters they have in common with respect to how many letters they jointly have.\n",
    "\n",
    "This will become a symmetric edge feature. Symmetric means, that if a and b are similar, then b and a as well, with the\n",
    "same similarity.\n",
    "\n",
    "We only store one copy of each symmetric pair of edges.\n",
    "\n",
    "We can then use\n",
    "[`E.sim.b(node)`](https://annotation.github.io/text-fabric/tf/core/edgefeature.html)\n",
    "to find all nodes that are parallel to node.\n",
    "\n",
    "If words do not have letters in common, their similarity is 0, and we do not make an edge."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "# Preparation\n",
    "\n",
    "We pre-compute all letter sets for all words."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def makeSet(w):\n",
    "    return set(F.letters.v(w).lower())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "lines_to_end_of_cell_marker": 2,
    "lines_to_next_cell": 2
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "99 words\n"
     ]
    }
   ],
   "source": [
    "words = {}\n",
    "\n",
    "for w in F.otype.s(\"word\"):\n",
    "    words[w] = makeSet(w)\n",
    "\n",
    "nWords = len(words)\n",
    "print(f\"{nWords} words\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "def sim(wSet, vSet):\n",
    "    return int(round(100 * len(wSet & vSet) / len(wSet | vSet)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "lines_to_next_cell": 2
   },
   "source": [
    "# Compute all similarities\n",
    "\n",
    "We are going to perform all comparisons.\n",
    "\n",
    "Since there are 99 words, this will amount to only 5000 comparisons.\n",
    "\n",
    "For a big corpus, this amount will quickly grow with the number of items to be compared.\n",
    "\n",
    "See for example the similarities in the\n",
    "[Quran](https://nbviewer.jupyter.org/github/q-ran/quran/blob/master/programs/parallels.ipynb)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def computeSim():\n",
    "    similarity = {}\n",
    "\n",
    "    wordNodes = sorted(words.keys())\n",
    "    nWords = len(wordNodes)\n",
    "\n",
    "    nComparisons = nWords * (nWords - 1) // 2\n",
    "\n",
    "    print(f\"{nComparisons} comparisons to make\")\n",
    "\n",
    "    TF.indent(reset=True)\n",
    "\n",
    "    co = 0\n",
    "    si = 0\n",
    "    stop = False\n",
    "    for i in range(nWords):\n",
    "        nodeI = wordNodes[i]\n",
    "        wordI = words[nodeI]\n",
    "        for j in range(i + 1, nWords):\n",
    "            nodeJ = wordNodes[j]\n",
    "            wordJ = words[nodeJ]\n",
    "            s = sim(wordI, wordJ)\n",
    "            co += 1\n",
    "            if s:\n",
    "                similarity[(nodeI, nodeJ)] = sim(wordI, wordJ)\n",
    "                si += 1\n",
    "        if stop:\n",
    "            break\n",
    "\n",
    "    TF.info(f\"{co:>4} comparisons and {si:>4} similarities\")\n",
    "    return similarity"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4851 comparisons to make\n",
      "  0.01s 4851 comparisons and 3332 similarities\n"
     ]
    }
   ],
   "source": [
    "similarity = computeSim()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7\n",
      "100\n"
     ]
    }
   ],
   "source": [
    "print(min(similarity.values()))\n",
    "print(max(similarity.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "eq = [x for x in similarity.items() if x[1] >= 100]\n",
    "neq = [x for x in similarity.items() if x[1] <= 50]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "((1, 4), 100)\n",
      "((1, 2), 8)\n"
     ]
    }
   ],
   "source": [
    "print(eq[0])\n",
    "print(neq[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "58\n",
      "3247\n"
     ]
    }
   ],
   "source": [
    "print(len(eq))\n",
    "print(len(neq))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 Everything\n",
      "4 everything\n"
     ]
    }
   ],
   "source": [
    "print(eq[0][0][0], F.letters.v(eq[0][0][0]))\n",
    "print(eq[0][0][1], F.letters.v(eq[0][0][1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 Everything\n",
      "2 about\n"
     ]
    }
   ],
   "source": [
    "print(neq[0][0][0], F.letters.v(neq[0][0][0]))\n",
    "print(neq[0][0][1], F.letters.v(neq[0][0][1]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Add parallels to the TF dataset\n",
    "\n",
    "We now add this information to the Banks dataset as an *edge feature*."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "GH_BASE = os.path.expanduser(\"~/github\")\n",
    "\n",
    "path = f\"{A.context.org}/{A.context.repo}/sim/tf\"\n",
    "location = f\"{GH_BASE}/{path}\"\n",
    "module = A.context.version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "metaData = {\n",
    "    \"\": {\n",
    "        \"name\": \"Banks (similar words)\",\n",
    "        \"converters\": \"Dirk Roorda\",\n",
    "        \"sourceUrl\": \"https://nbviewer.jupyter.org/github/annotation/tutorials/blob/master/text-fabric/use.ipynb\",\n",
    "        \"version\": \"0.2\",\n",
    "    },\n",
    "    \"sim\": {\n",
    "        \"valueType\": \"int\",\n",
    "        \"edgeValues\": True,\n",
    "        \"description\": \"similarity between words, as a percentage of the common material wrt the combined material\",\n",
    "    },\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "simData = {}\n",
    "for ((f, t), d) in similarity.items():\n",
    "    simData.setdefault(f, {})[t] = d"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  0.00s Exporting 0 node and 1 edge and 0 config features to ~/github/annotation/banks/sim/tf/0.2:\n",
      "   |     0.01s T sim                  to ~/github/annotation/banks/sim/tf/0.2\n",
      "  0.01s Exported 0 node features and 1 edge features and 0 config features to ~/github/annotation/banks/sim/tf/0.2\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "A.api.TF.save(\n",
    "    edgeFeatures=dict(sim=simData), metaData=metaData, location=location, module=module\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "All chapters:\n",
    "\n",
    "* *use*\n",
    "* [share](share.ipynb)\n",
    "* [app](app.ipynb)\n",
    "* [repo](repo.ipynb)\n",
    "* [compose](compose.ipynb)\n",
    "\n",
    "---\n",
    "\n",
    "CC-BY Dirk Roorda"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}