{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ "import os\n", "import pickle\n", "import gzip\n", "\n", "from tf.app import use\n", "from tf.fabric import Fabric" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "ghBase = os.path.expanduser(\"~/github\")\n", "org = \"etcbc\"\n", "repo = \"dss\"\n", "subdir = \"parallels\"\n", "mainpath = f\"{org}/{repo}/tf\"\n", "path = f\"{org}/{repo}/{subdir}/tf\"\n", "location = f\"{ghBase}/{path}\"\n", "mainlocation = f\"{ghBase}/{mainpath}\"\n", "version = \"1.6\"\n", "module = version\n", "tempdir = f\"{ghBase}/{org}/{repo}/_temp\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This is Text-Fabric 10.0.3\n", "Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html\n", "\n", "70 features found and 0 ignored\n" ] } ], "source": [ "TF = Fabric(locations=mainlocation, modules=module)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.37s Dataset without structure sections in otext:no structure functions in the T-API\n", " 3.75s All features loaded/computed - for details use TF.isLoaded()\n" ] } ], "source": [ "api = TF.load(\"lex type\")\n", "docs = api.makeAvailableIn(globals())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Parallels\n", "\n", "We make edges between similar lines.\n", "\n", "When are lines similar?\n", "\n", "If a certain distance metric is above a certain threshold.\n", "\n", "We choose this metric:\n", "\n", "* we reduce a line to the set of lexemes in it.\n", "* the similarity between two lines is the length of the intersection divided by the length of the union of their sets times 100." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Preparation\n", "\n", "We pre-compute all sets for lines.\n", "\n", "But because not all lines are filled with definite material, we exclude lines with 5 or less consonants." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.49s 37106 contentful lines out of 52895\n" ] } ], "source": [ "CONS = \"cons\"\n", "\n", "valid = set()\n", "\n", "allLines = F.otype.s(\"line\")\n", "\n", "TF.indent(reset=True)\n", "for ln in F.otype.s(\"line\"):\n", " if ln in valid:\n", " continue\n", " if sum(1 for s in L.d(ln, otype=\"sign\") if F.type.v(s) == CONS) >= 5:\n", " valid.add(ln)\n", "\n", "TF.info(f\"{len(valid)} contentful lines out of {len(allLines)}\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def makeSet(ln):\n", " lineSet = set()\n", " for s in L.d(ln, otype=\"word\"):\n", " r = F.lex.v(s)\n", " if r:\n", " lineSet.add(r)\n", " return lineSet" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.36s 37106 lines\n" ] } ], "source": [ "lines = {}\n", "\n", "TF.indent(reset=True)\n", "for ln in valid:\n", " lineSet = makeSet(ln)\n", " if lineSet:\n", " lines[ln] = lineSet\n", "\n", "nLines = len(lines)\n", "TF.info(f\"{nLines} lines\")" ] }, { "cell_type": "markdown", "metadata": { "lines_to_next_cell": 2 }, "source": [ "# Measure" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def sim(lSet, mSet):\n", " return int(round(100 * len(lSet & mSet) / len(lSet | mSet)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Compute all similarities\n", "\n", "We are going to perform more than half a billion of comparisons, each of which is more than an elementary operation.\n", "\n", "Let's measure time." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "THRESHOLD = 60\n", "\n", "\n", "def computeSim(limit=None):\n", " similarity = {}\n", "\n", " lineNodes = sorted(lines.keys())\n", " nLines = len(lineNodes)\n", "\n", " nComparisons = nLines * (nLines - 1) // 2\n", "\n", " print(f\"{nComparisons} comparisons to make\")\n", " chunkSize = nComparisons // 1000\n", "\n", " co = 0\n", " b = 0\n", " si = 0\n", " p = 0\n", "\n", " TF.indent(reset=True)\n", "\n", " stop = False\n", " for i in range(nLines):\n", " nodeI = lineNodes[i]\n", " lineI = lines[nodeI]\n", " for j in range(i + 1, nLines):\n", " nodeJ = lineNodes[j]\n", " lineJ = lines[nodeJ]\n", " s = sim(lineI, lineJ)\n", " co += 1\n", " b += 1\n", " if b == chunkSize:\n", " p += 1\n", " TF.info(f\"{p:>3}‰ - {co:>12} comparisons and {si:>10} similarities\")\n", " b = 0\n", " if limit is not None and p >= limit:\n", " stop = True\n", " break\n", "\n", " if s < THRESHOLD:\n", " continue\n", " similarity[(nodeI, nodeJ)] = sim(lineI, lineJ)\n", " si += 1\n", " if stop:\n", " break\n", "\n", " TF.info(f\"{p:>3}% - {co:>12} comparisons and {si:>10} similarities\")\n", " return similarity" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We are going to run it to several ‰ first and do some checks then." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "688409065 comparisons to make\n", " 0.75s 1‰ - 688409 comparisons and 12 similarities\n", " 1.52s 2‰ - 1376818 comparisons and 20 similarities\n", " 2.28s 3‰ - 2065227 comparisons and 28 similarities\n", " 2.28s 3% - 2065227 comparisons and 28 similarities\n" ] } ], "source": [ "similarity = computeSim(limit=3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We check the sanity of the results." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "60\n", "100\n" ] } ], "source": [ "print(min(similarity.values()))\n", "print(max(similarity.values()))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "eq = [x for x in similarity.items() if x[1] >= 100]\n", "neq = [x for x in similarity.items() if x[1] <= 70]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "9\n" ] } ], "source": [ "print(len(eq))\n", "print(len(neq))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "((1552980, 1563775), 100)\n", "((1552973, 1563769), 69)\n" ] } ], "source": [ "print(eq[0])\n", "print(neq[0])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "את ארצ׳ו ולדשן בטוב אדמת׳ו ׃ ויבינו בעונ׳ם וידעו כי \n", "את ארצ׳ו ולדשן בטוב אדמת׳ו ׃ ויבינו בעוונ׳מה וידעו כי \n" ] } ], "source": [ "print(T.text(eq[0][0][0]))\n", "print(T.text(eq[0][0][1]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looks good.\n", "\n", "Now the whole computation.\n", "\n", "But if we have done this before, and nothing has changed, we load previous results from disk.\n", "\n", "If we do not find previous results, we compute them and save the results to disk." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "PARA_DIR = f\"{tempdir}/parallels\"\n", "\n", "\n", "def writeResults(data, location, name):\n", " if not os.path.exists(location):\n", " os.makedirs(location, exist_ok=True)\n", " path = f\"{location}/{name}\"\n", " with gzip.open(path, \"wb\") as f:\n", " pickle.dump(data, f)\n", " TF.info(f\"Data written to {path}\")\n", "\n", "\n", "def readResults(location, name):\n", " TF.indent(reset=True)\n", " path = f\"{location}/{name}\"\n", " if not os.path.exists(path):\n", " print(f\"File not found: {path}\")\n", " return None\n", " with gzip.open(path, \"rb\") as f:\n", " data = pickle.load(f)\n", " TF.info(f\"Data read from {path}\")\n", " return data" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File not found: C:\\Users\\geitb/github/etcbc/dss/_temp/parallels/sim-1.6.zip\n", "688409065 comparisons to make\n", " 0.71s 1‰ - 688409 comparisons and 12 similarities\n", " 1.44s 2‰ - 1376818 comparisons and 20 similarities\n", " 2.20s 3‰ - 2065227 comparisons and 28 similarities\n", " 2.95s 4‰ - 2753636 comparisons and 34 similarities\n", " 3.70s 5‰ - 3442045 comparisons and 40 similarities\n", " 4.45s 6‰ - 4130454 comparisons and 57 similarities\n", " 5.19s 7‰ - 4818863 comparisons and 70 similarities\n", " 5.92s 8‰ - 5507272 comparisons and 81 similarities\n", " 6.66s 9‰ - 6195681 comparisons and 92 similarities\n", " 7.40s 10‰ - 6884090 comparisons and 107 similarities\n", " 8.13s 11‰ - 7572499 comparisons and 122 similarities\n", " 8.86s 12‰ - 8260908 comparisons and 135 similarities\n", " 9.59s 13‰ - 8949317 comparisons and 148 similarities\n", " 10s 14‰ - 9637726 comparisons and 159 similarities\n", " 11s 15‰ - 10326135 comparisons and 169 similarities\n", " 12s 16‰ - 11014544 comparisons and 189 similarities\n", " 13s 17‰ - 11702953 comparisons and 200 similarities\n", " 13s 18‰ - 12391362 comparisons and 240 similarities\n", " 14s 19‰ - 13079771 comparisons and 242 similarities\n", " 15s 20‰ - 13768180 comparisons and 244 similarities\n", " 16s 21‰ - 14456589 comparisons and 245 similarities\n", " 16s 22‰ - 15144998 comparisons and 249 similarities\n", " 17s 23‰ - 15833407 comparisons and 255 similarities\n", " 18s 24‰ - 16521816 comparisons and 266 similarities\n", " 19s 25‰ - 17210225 comparisons and 272 similarities\n", " 19s 26‰ - 17898634 comparisons and 280 similarities\n", " 20s 27‰ - 18587043 comparisons and 287 similarities\n", " 21s 28‰ - 19275452 comparisons and 290 similarities\n", " 22s 29‰ - 19963861 comparisons and 291 similarities\n", " 23s 30‰ - 20652270 comparisons and 297 similarities\n", " 23s 31‰ - 21340679 comparisons and 304 similarities\n", " 24s 32‰ - 22029088 comparisons and 319 similarities\n", " 25s 33‰ - 22717497 comparisons and 331 similarities\n", " 26s 34‰ - 23405906 comparisons and 344 similarities\n", " 26s 35‰ - 24094315 comparisons and 370 similarities\n", " 27s 36‰ - 24782724 comparisons and 382 similarities\n", " 28s 37‰ - 25471133 comparisons and 386 similarities\n", " 29s 38‰ - 26159542 comparisons and 402 similarities\n", " 29s 39‰ - 26847951 comparisons and 410 similarities\n", " 30s 40‰ - 27536360 comparisons and 429 similarities\n", " 31s 41‰ - 28224769 comparisons and 556 similarities\n", " 31s 42‰ - 28913178 comparisons and 630 similarities\n", " 32s 43‰ - 29601587 comparisons and 657 similarities\n", " 33s 44‰ - 30289996 comparisons and 797 similarities\n", " 34s 45‰ - 30978405 comparisons and 801 similarities\n", " 34s 46‰ - 31666814 comparisons and 802 similarities\n", " 35s 47‰ - 32355223 comparisons and 807 similarities\n", " 36s 48‰ - 33043632 comparisons and 807 similarities\n", " 36s 49‰ - 33732041 comparisons and 807 similarities\n", " 37s 50‰ - 34420450 comparisons and 807 similarities\n", " 38s 51‰ - 35108859 comparisons and 816 similarities\n", " 38s 52‰ - 35797268 comparisons and 821 similarities\n", " 39s 53‰ - 36485677 comparisons and 826 similarities\n", " 40s 54‰ - 37174086 comparisons and 827 similarities\n", " 41s 55‰ - 37862495 comparisons and 828 similarities\n", " 41s 56‰ - 38550904 comparisons and 846 similarities\n", " 42s 57‰ - 39239313 comparisons and 847 similarities\n", " 43s 58‰ - 39927722 comparisons and 847 similarities\n", " 44s 59‰ - 40616131 comparisons and 847 similarities\n", " 44s 60‰ - 41304540 comparisons and 849 similarities\n", " 45s 61‰ - 41992949 comparisons and 851 similarities\n", " 46s 62‰ - 42681358 comparisons and 856 similarities\n", " 47s 63‰ - 43369767 comparisons and 859 similarities\n", " 47s 64‰ - 44058176 comparisons and 860 similarities\n", " 48s 65‰ - 44746585 comparisons and 861 similarities\n", " 49s 66‰ - 45434994 comparisons and 874 similarities\n", " 50s 67‰ - 46123403 comparisons and 878 similarities\n", " 50s 68‰ - 46811812 comparisons and 883 similarities\n", " 51s 69‰ - 47500221 comparisons and 960 similarities\n", " 52s 70‰ - 48188630 comparisons and 965 similarities\n", " 53s 71‰ - 48877039 comparisons and 972 similarities\n", " 53s 72‰ - 49565448 comparisons and 1005 similarities\n", " 54s 73‰ - 50253857 comparisons and 1028 similarities\n", " 55s 74‰ - 50942266 comparisons and 1030 similarities\n", " 55s 75‰ - 51630675 comparisons and 1035 similarities\n", " 56s 76‰ - 52319084 comparisons and 1155 similarities\n", " 57s 77‰ - 53007493 comparisons and 1155 similarities\n", " 58s 78‰ - 53695902 comparisons and 1163 similarities\n", " 58s 79‰ - 54384311 comparisons and 1380 similarities\n", " 59s 80‰ - 55072720 comparisons and 1380 similarities\n", " 1m 00s 81‰ - 55761129 comparisons and 1387 similarities\n", " 1m 01s 82‰ - 56449538 comparisons and 1388 similarities\n", " 1m 01s 83‰ - 57137947 comparisons and 1494 similarities\n", " 1m 02s 84‰ - 57826356 comparisons and 1496 similarities\n", " 1m 03s 85‰ - 58514765 comparisons and 1817 similarities\n", " 1m 04s 86‰ - 59203174 comparisons and 1821 similarities\n", " 1m 04s 87‰ - 59891583 comparisons and 1830 similarities\n", " 1m 05s 88‰ - 60579992 comparisons and 1832 similarities\n", " 1m 06s 89‰ - 61268401 comparisons and 1835 similarities\n", " 1m 07s 90‰ - 61956810 comparisons and 1845 similarities\n", " 1m 07s 91‰ - 62645219 comparisons and 1851 similarities\n", " 1m 08s 92‰ - 63333628 comparisons and 1857 similarities\n", " 1m 09s 93‰ - 64022037 comparisons and 1857 similarities\n", " 1m 10s 94‰ - 64710446 comparisons and 1865 similarities\n", " 1m 10s 95‰ - 65398855 comparisons and 1870 similarities\n", " 1m 11s 96‰ - 66087264 comparisons and 1870 similarities\n", " 1m 12s 97‰ - 66775673 comparisons and 1875 similarities\n", " 1m 13s 98‰ - 67464082 comparisons and 1877 similarities\n", " 1m 13s 99‰ - 68152491 comparisons and 1880 similarities\n", " 1m 14s 100‰ - 68840900 comparisons and 1883 similarities\n", " 1m 15s 101‰ - 69529309 comparisons and 1885 similarities\n", " 1m 15s 102‰ - 70217718 comparisons and 1898 similarities\n", " 1m 16s 103‰ - 70906127 comparisons and 1907 similarities\n", " 1m 17s 104‰ - 71594536 comparisons and 2019 similarities\n", " 1m 18s 105‰ - 72282945 comparisons and 2042 similarities\n", " 1m 18s 106‰ - 72971354 comparisons and 2045 similarities\n", " 1m 19s 107‰ - 73659763 comparisons and 2063 similarities\n", " 1m 20s 108‰ - 74348172 comparisons and 2095 similarities\n", " 1m 20s 109‰ - 75036581 comparisons and 2109 similarities\n", " 1m 21s 110‰ - 75724990 comparisons and 2126 similarities\n", " 1m 22s 111‰ - 76413399 comparisons and 2165 similarities\n", " 1m 22s 112‰ - 77101808 comparisons and 2327 similarities\n", " 1m 23s 113‰ - 77790217 comparisons and 2346 similarities\n", " 1m 24s 114‰ - 78478626 comparisons and 2452 similarities\n", " 1m 24s 115‰ - 79167035 comparisons and 2653 similarities\n", " 1m 25s 116‰ - 79855444 comparisons and 2994 similarities\n", " 1m 26s 117‰ - 80543853 comparisons and 3105 similarities\n", " 1m 26s 118‰ - 81232262 comparisons and 3271 similarities\n", " 1m 27s 119‰ - 81920671 comparisons and 3381 similarities\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 1m 28s 120‰ - 82609080 comparisons and 3424 similarities\n", " 1m 28s 121‰ - 83297489 comparisons and 3528 similarities\n", " 1m 29s 122‰ - 83985898 comparisons and 3612 similarities\n", " 1m 30s 123‰ - 84674307 comparisons and 3612 similarities\n", " 1m 30s 124‰ - 85362716 comparisons and 3629 similarities\n", " 1m 31s 125‰ - 86051125 comparisons and 3659 similarities\n", " 1m 32s 126‰ - 86739534 comparisons and 3660 similarities\n", " 1m 32s 127‰ - 87427943 comparisons and 3663 similarities\n", " 1m 33s 128‰ - 88116352 comparisons and 3689 similarities\n", " 1m 34s 129‰ - 88804761 comparisons and 3764 similarities\n", " 1m 35s 130‰ - 89493170 comparisons and 3772 similarities\n", " 1m 35s 131‰ - 90181579 comparisons and 3792 similarities\n", " 1m 36s 132‰ - 90869988 comparisons and 3811 similarities\n", " 1m 37s 133‰ - 91558397 comparisons and 3811 similarities\n", " 1m 37s 134‰ - 92246806 comparisons and 3811 similarities\n", " 1m 38s 135‰ - 92935215 comparisons and 3811 similarities\n", " 1m 39s 136‰ - 93623624 comparisons and 3811 similarities\n", " 1m 40s 137‰ - 94312033 comparisons and 3811 similarities\n", " 1m 41s 138‰ - 95000442 comparisons and 3811 similarities\n", " 1m 41s 139‰ - 95688851 comparisons and 3813 similarities\n", " 1m 42s 140‰ - 96377260 comparisons and 3868 similarities\n", " 1m 42s 141‰ - 97065669 comparisons and 4117 similarities\n", " 1m 43s 142‰ - 97754078 comparisons and 4117 similarities\n", " 1m 44s 143‰ - 98442487 comparisons and 4121 similarities\n", " 1m 45s 144‰ - 99130896 comparisons and 4190 similarities\n", " 1m 45s 145‰ - 99819305 comparisons and 4364 similarities\n", " 1m 46s 146‰ - 100507714 comparisons and 4538 similarities\n", " 1m 46s 147‰ - 101196123 comparisons and 4819 similarities\n", " 1m 47s 148‰ - 101884532 comparisons and 4867 similarities\n", " 1m 48s 149‰ - 102572941 comparisons and 4945 similarities\n", " 1m 48s 150‰ - 103261350 comparisons and 4964 similarities\n", " 1m 49s 151‰ - 103949759 comparisons and 5145 similarities\n", " 1m 50s 152‰ - 104638168 comparisons and 5233 similarities\n", " 1m 50s 153‰ - 105326577 comparisons and 5261 similarities\n", " 1m 51s 154‰ - 106014986 comparisons and 5342 similarities\n", " 1m 52s 155‰ - 106703395 comparisons and 5463 similarities\n", " 1m 52s 156‰ - 107391804 comparisons and 5608 similarities\n", " 1m 53s 157‰ - 108080213 comparisons and 5734 similarities\n", " 1m 53s 158‰ - 108768622 comparisons and 6005 similarities\n", " 1m 54s 159‰ - 109457031 comparisons and 6220 similarities\n", " 1m 55s 160‰ - 110145440 comparisons and 6547 similarities\n", " 1m 55s 161‰ - 110833849 comparisons and 6555 similarities\n", " 1m 56s 162‰ - 111522258 comparisons and 6587 similarities\n", " 1m 57s 163‰ - 112210667 comparisons and 6653 similarities\n", " 1m 57s 164‰ - 112899076 comparisons and 6689 similarities\n", " 1m 58s 165‰ - 113587485 comparisons and 6724 similarities\n", " 1m 59s 166‰ - 114275894 comparisons and 6900 similarities\n", " 1m 59s 167‰ - 114964303 comparisons and 6984 similarities\n", " 2m 00s 168‰ - 115652712 comparisons and 7102 similarities\n", " 2m 00s 169‰ - 116341121 comparisons and 7234 similarities\n", " 2m 01s 170‰ - 117029530 comparisons and 7234 similarities\n", " 2m 02s 171‰ - 117717939 comparisons and 7235 similarities\n", " 2m 02s 172‰ - 118406348 comparisons and 7237 similarities\n", " 2m 03s 173‰ - 119094757 comparisons and 7246 similarities\n", " 2m 04s 174‰ - 119783166 comparisons and 7246 similarities\n", " 2m 04s 175‰ - 120471575 comparisons and 7246 similarities\n", " 2m 05s 176‰ - 121159984 comparisons and 7246 similarities\n", " 2m 05s 177‰ - 121848393 comparisons and 7247 similarities\n", " 2m 06s 178‰ - 122536802 comparisons and 7251 similarities\n", " 2m 07s 179‰ - 123225211 comparisons and 7251 similarities\n", " 2m 07s 180‰ - 123913620 comparisons and 7372 similarities\n", " 2m 08s 181‰ - 124602029 comparisons and 7383 similarities\n", " 2m 09s 182‰ - 125290438 comparisons and 7458 similarities\n", " 2m 09s 183‰ - 125978847 comparisons and 7488 similarities\n", " 2m 10s 184‰ - 126667256 comparisons and 7514 similarities\n", " 2m 11s 185‰ - 127355665 comparisons and 7551 similarities\n", " 2m 11s 186‰ - 128044074 comparisons and 7589 similarities\n", " 2m 12s 187‰ - 128732483 comparisons and 7593 similarities\n", " 2m 13s 188‰ - 129420892 comparisons and 7674 similarities\n", " 2m 13s 189‰ - 130109301 comparisons and 7681 similarities\n", " 2m 14s 190‰ - 130797710 comparisons and 7689 similarities\n", " 2m 15s 191‰ - 131486119 comparisons and 7694 similarities\n", " 2m 15s 192‰ - 132174528 comparisons and 7719 similarities\n", " 2m 16s 193‰ - 132862937 comparisons and 7840 similarities\n", " 2m 17s 194‰ - 133551346 comparisons and 7856 similarities\n", " 2m 17s 195‰ - 134239755 comparisons and 7988 similarities\n", " 2m 18s 196‰ - 134928164 comparisons and 8051 similarities\n", " 2m 19s 197‰ - 135616573 comparisons and 8143 similarities\n", " 2m 20s 198‰ - 136304982 comparisons and 8172 similarities\n", " 2m 20s 199‰ - 136993391 comparisons and 8224 similarities\n", " 2m 21s 200‰ - 137681800 comparisons and 8252 similarities\n", " 2m 21s 201‰ - 138370209 comparisons and 8298 similarities\n", " 2m 22s 202‰ - 139058618 comparisons and 8306 similarities\n", " 2m 23s 203‰ - 139747027 comparisons and 8340 similarities\n", " 2m 23s 204‰ - 140435436 comparisons and 8435 similarities\n", " 2m 24s 205‰ - 141123845 comparisons and 8564 similarities\n", " 2m 25s 206‰ - 141812254 comparisons and 8567 similarities\n", " 2m 26s 207‰ - 142500663 comparisons and 8571 similarities\n", " 2m 26s 208‰ - 143189072 comparisons and 8586 similarities\n", " 2m 27s 209‰ - 143877481 comparisons and 8596 similarities\n", " 2m 28s 210‰ - 144565890 comparisons and 8600 similarities\n", " 2m 28s 211‰ - 145254299 comparisons and 8609 similarities\n", " 2m 29s 212‰ - 145942708 comparisons and 8634 similarities\n", " 2m 30s 213‰ - 146631117 comparisons and 8658 similarities\n", " 2m 30s 214‰ - 147319526 comparisons and 8666 similarities\n", " 2m 31s 215‰ - 148007935 comparisons and 8692 similarities\n", " 2m 32s 216‰ - 148696344 comparisons and 8821 similarities\n", " 2m 33s 217‰ - 149384753 comparisons and 8848 similarities\n", " 2m 33s 218‰ - 150073162 comparisons and 8871 similarities\n", " 2m 34s 219‰ - 150761571 comparisons and 8888 similarities\n", " 2m 35s 220‰ - 151449980 comparisons and 8934 similarities\n", " 2m 35s 221‰ - 152138389 comparisons and 9021 similarities\n", " 2m 36s 222‰ - 152826798 comparisons and 9099 similarities\n", " 2m 37s 223‰ - 153515207 comparisons and 9099 similarities\n", " 2m 37s 224‰ - 154203616 comparisons and 9109 similarities\n", " 2m 38s 225‰ - 154892025 comparisons and 9127 similarities\n", " 2m 39s 226‰ - 155580434 comparisons and 9212 similarities\n", " 2m 39s 227‰ - 156268843 comparisons and 9408 similarities\n", " 2m 40s 228‰ - 156957252 comparisons and 9493 similarities\n", " 2m 41s 229‰ - 157645661 comparisons and 9502 similarities\n", " 2m 41s 230‰ - 158334070 comparisons and 9529 similarities\n", " 2m 42s 231‰ - 159022479 comparisons and 9571 similarities\n", " 2m 43s 232‰ - 159710888 comparisons and 9598 similarities\n", " 2m 43s 233‰ - 160399297 comparisons and 9656 similarities\n", " 2m 44s 234‰ - 161087706 comparisons and 9683 similarities\n", " 2m 45s 235‰ - 161776115 comparisons and 9712 similarities\n", " 2m 45s 236‰ - 162464524 comparisons and 9955 similarities\n", " 2m 46s 237‰ - 163152933 comparisons and 9961 similarities\n", " 2m 47s 238‰ - 163841342 comparisons and 9968 similarities\n", " 2m 47s 239‰ - 164529751 comparisons and 10001 similarities\n", " 2m 48s 240‰ - 165218160 comparisons and 10093 similarities\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 2m 49s 241‰ - 165906569 comparisons and 10095 similarities\n", " 2m 49s 242‰ - 166594978 comparisons and 10171 similarities\n", " 2m 50s 243‰ - 167283387 comparisons and 10426 similarities\n", " 2m 51s 244‰ - 167971796 comparisons and 10426 similarities\n", " 2m 51s 245‰ - 168660205 comparisons and 10432 similarities\n", " 2m 52s 246‰ - 169348614 comparisons and 10518 similarities\n", " 2m 53s 247‰ - 170037023 comparisons and 10537 similarities\n", " 2m 53s 248‰ - 170725432 comparisons and 10570 similarities\n", " 2m 54s 249‰ - 171413841 comparisons and 10637 similarities\n", " 2m 55s 250‰ - 172102250 comparisons and 10644 similarities\n", " 2m 56s 251‰ - 172790659 comparisons and 10653 similarities\n", " 2m 56s 252‰ - 173479068 comparisons and 10660 similarities\n", " 2m 57s 253‰ - 174167477 comparisons and 10667 similarities\n", " 2m 58s 254‰ - 174855886 comparisons and 10667 similarities\n", " 2m 58s 255‰ - 175544295 comparisons and 10687 similarities\n", " 2m 59s 256‰ - 176232704 comparisons and 10721 similarities\n", " 3m 00s 257‰ - 176921113 comparisons and 10770 similarities\n", " 3m 00s 258‰ - 177609522 comparisons and 10773 similarities\n", " 3m 01s 259‰ - 178297931 comparisons and 10776 similarities\n", " 3m 02s 260‰ - 178986340 comparisons and 10776 similarities\n", " 3m 03s 261‰ - 179674749 comparisons and 10804 similarities\n", " 3m 03s 262‰ - 180363158 comparisons and 10804 similarities\n", " 3m 04s 263‰ - 181051567 comparisons and 10804 similarities\n", " 3m 05s 264‰ - 181739976 comparisons and 10815 similarities\n", " 3m 06s 265‰ - 182428385 comparisons and 10815 similarities\n", " 3m 06s 266‰ - 183116794 comparisons and 10980 similarities\n", " 3m 07s 267‰ - 183805203 comparisons and 10983 similarities\n", " 3m 08s 268‰ - 184493612 comparisons and 11057 similarities\n", " 3m 08s 269‰ - 185182021 comparisons and 11169 similarities\n", " 3m 09s 270‰ - 185870430 comparisons and 11268 similarities\n", " 3m 10s 271‰ - 186558839 comparisons and 11403 similarities\n", " 3m 10s 272‰ - 187247248 comparisons and 11517 similarities\n", " 3m 11s 273‰ - 187935657 comparisons and 11723 similarities\n", " 3m 12s 274‰ - 188624066 comparisons and 11868 similarities\n", " 3m 12s 275‰ - 189312475 comparisons and 11939 similarities\n", " 3m 13s 276‰ - 190000884 comparisons and 11972 similarities\n", " 3m 14s 277‰ - 190689293 comparisons and 12002 similarities\n", " 3m 15s 278‰ - 191377702 comparisons and 12015 similarities\n", " 3m 15s 279‰ - 192066111 comparisons and 12042 similarities\n", " 3m 16s 280‰ - 192754520 comparisons and 12045 similarities\n", " 3m 17s 281‰ - 193442929 comparisons and 12054 similarities\n", " 3m 17s 282‰ - 194131338 comparisons and 12055 similarities\n", " 3m 18s 283‰ - 194819747 comparisons and 12056 similarities\n", " 3m 19s 284‰ - 195508156 comparisons and 12061 similarities\n", " 3m 19s 285‰ - 196196565 comparisons and 12071 similarities\n", " 3m 20s 286‰ - 196884974 comparisons and 12107 similarities\n", " 3m 21s 287‰ - 197573383 comparisons and 12120 similarities\n", " 3m 21s 288‰ - 198261792 comparisons and 12126 similarities\n", " 3m 22s 289‰ - 198950201 comparisons and 12153 similarities\n", " 3m 23s 290‰ - 199638610 comparisons and 12154 similarities\n", " 3m 24s 291‰ - 200327019 comparisons and 12175 similarities\n", " 3m 24s 292‰ - 201015428 comparisons and 12175 similarities\n", " 3m 25s 293‰ - 201703837 comparisons and 12176 similarities\n", " 3m 26s 294‰ - 202392246 comparisons and 12180 similarities\n", " 3m 26s 295‰ - 203080655 comparisons and 12237 similarities\n", " 3m 27s 296‰ - 203769064 comparisons and 12318 similarities\n", " 3m 28s 297‰ - 204457473 comparisons and 12324 similarities\n", " 3m 29s 298‰ - 205145882 comparisons and 12346 similarities\n", " 3m 29s 299‰ - 205834291 comparisons and 12346 similarities\n", " 3m 30s 300‰ - 206522700 comparisons and 12425 similarities\n", " 3m 31s 301‰ - 207211109 comparisons and 12499 similarities\n", " 3m 32s 302‰ - 207899518 comparisons and 12499 similarities\n", " 3m 32s 303‰ - 208587927 comparisons and 12499 similarities\n", " 3m 33s 304‰ - 209276336 comparisons and 12501 similarities\n", " 3m 34s 305‰ - 209964745 comparisons and 12501 similarities\n", " 3m 35s 306‰ - 210653154 comparisons and 12505 similarities\n", " 3m 35s 307‰ - 211341563 comparisons and 12511 similarities\n", " 3m 36s 308‰ - 212029972 comparisons and 12525 similarities\n", " 3m 37s 309‰ - 212718381 comparisons and 12531 similarities\n", " 3m 37s 310‰ - 213406790 comparisons and 12592 similarities\n", " 3m 38s 311‰ - 214095199 comparisons and 12700 similarities\n", " 3m 39s 312‰ - 214783608 comparisons and 12728 similarities\n", " 3m 39s 313‰ - 215472017 comparisons and 12770 similarities\n", " 3m 40s 314‰ - 216160426 comparisons and 12798 similarities\n", " 3m 41s 315‰ - 216848835 comparisons and 12860 similarities\n", " 3m 41s 316‰ - 217537244 comparisons and 12883 similarities\n", " 3m 42s 317‰ - 218225653 comparisons and 13064 similarities\n", " 3m 42s 318‰ - 218914062 comparisons and 13171 similarities\n", " 3m 43s 319‰ - 219602471 comparisons and 13178 similarities\n", " 3m 44s 320‰ - 220290880 comparisons and 13187 similarities\n", " 3m 45s 321‰ - 220979289 comparisons and 13255 similarities\n", " 3m 45s 322‰ - 221667698 comparisons and 13259 similarities\n", " 3m 46s 323‰ - 222356107 comparisons and 13276 similarities\n", " 3m 47s 324‰ - 223044516 comparisons and 13288 similarities\n", " 3m 47s 325‰ - 223732925 comparisons and 13358 similarities\n", " 3m 48s 326‰ - 224421334 comparisons and 13530 similarities\n", " 3m 49s 327‰ - 225109743 comparisons and 13554 similarities\n", " 3m 49s 328‰ - 225798152 comparisons and 13580 similarities\n", " 3m 50s 329‰ - 226486561 comparisons and 13622 similarities\n", " 3m 51s 330‰ - 227174970 comparisons and 13623 similarities\n", " 3m 52s 331‰ - 227863379 comparisons and 13637 similarities\n", " 3m 52s 332‰ - 228551788 comparisons and 13759 similarities\n", " 3m 53s 333‰ - 229240197 comparisons and 13840 similarities\n", " 3m 54s 334‰ - 229928606 comparisons and 14040 similarities\n", " 3m 54s 335‰ - 230617015 comparisons and 14084 similarities\n", " 3m 55s 336‰ - 231305424 comparisons and 14105 similarities\n", " 3m 56s 337‰ - 231993833 comparisons and 14123 similarities\n", " 3m 56s 338‰ - 232682242 comparisons and 14153 similarities\n", " 3m 57s 339‰ - 233370651 comparisons and 14191 similarities\n", " 3m 58s 340‰ - 234059060 comparisons and 14207 similarities\n", " 3m 59s 341‰ - 234747469 comparisons and 14216 similarities\n", " 3m 59s 342‰ - 235435878 comparisons and 14228 similarities\n", " 4m 00s 343‰ - 236124287 comparisons and 14233 similarities\n", " 4m 01s 344‰ - 236812696 comparisons and 14245 similarities\n", " 4m 02s 345‰ - 237501105 comparisons and 14245 similarities\n", " 4m 02s 346‰ - 238189514 comparisons and 14247 similarities\n", " 4m 03s 347‰ - 238877923 comparisons and 14253 similarities\n", " 4m 04s 348‰ - 239566332 comparisons and 14266 similarities\n", " 4m 04s 349‰ - 240254741 comparisons and 14272 similarities\n", " 4m 05s 350‰ - 240943150 comparisons and 14339 similarities\n", " 4m 06s 351‰ - 241631559 comparisons and 14367 similarities\n", " 4m 07s 352‰ - 242319968 comparisons and 14484 similarities\n", " 4m 07s 353‰ - 243008377 comparisons and 14510 similarities\n", " 4m 08s 354‰ - 243696786 comparisons and 14542 similarities\n", " 4m 09s 355‰ - 244385195 comparisons and 14548 similarities\n", " 4m 09s 356‰ - 245073604 comparisons and 14607 similarities\n", " 4m 10s 357‰ - 245762013 comparisons and 14627 similarities\n", " 4m 11s 358‰ - 246450422 comparisons and 14632 similarities\n", " 4m 12s 359‰ - 247138831 comparisons and 14655 similarities\n", " 4m 12s 360‰ - 247827240 comparisons and 14677 similarities\n", " 4m 13s 361‰ - 248515649 comparisons and 14700 similarities\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 4m 14s 362‰ - 249204058 comparisons and 14757 similarities\n", " 4m 14s 363‰ - 249892467 comparisons and 14794 similarities\n", " 4m 15s 364‰ - 250580876 comparisons and 14802 similarities\n", " 4m 16s 365‰ - 251269285 comparisons and 14813 similarities\n", " 4m 16s 366‰ - 251957694 comparisons and 14824 similarities\n", " 4m 17s 367‰ - 252646103 comparisons and 14834 similarities\n", " 4m 18s 368‰ - 253334512 comparisons and 14853 similarities\n", " 4m 18s 369‰ - 254022921 comparisons and 15021 similarities\n", " 4m 19s 370‰ - 254711330 comparisons and 15152 similarities\n", " 4m 20s 371‰ - 255399739 comparisons and 15165 similarities\n", " 4m 21s 372‰ - 256088148 comparisons and 15223 similarities\n", " 4m 21s 373‰ - 256776557 comparisons and 15322 similarities\n", " 4m 22s 374‰ - 257464966 comparisons and 15323 similarities\n", " 4m 23s 375‰ - 258153375 comparisons and 15408 similarities\n", " 4m 23s 376‰ - 258841784 comparisons and 15414 similarities\n", " 4m 24s 377‰ - 259530193 comparisons and 15439 similarities\n", " 4m 25s 378‰ - 260218602 comparisons and 15453 similarities\n", " 4m 26s 379‰ - 260907011 comparisons and 15455 similarities\n", " 4m 26s 380‰ - 261595420 comparisons and 15611 similarities\n", " 4m 27s 381‰ - 262283829 comparisons and 15632 similarities\n", " 4m 28s 382‰ - 262972238 comparisons and 15639 similarities\n", " 4m 28s 383‰ - 263660647 comparisons and 15659 similarities\n", " 4m 29s 384‰ - 264349056 comparisons and 15662 similarities\n", " 4m 30s 385‰ - 265037465 comparisons and 15665 similarities\n", " 4m 31s 386‰ - 265725874 comparisons and 15673 similarities\n", " 4m 31s 387‰ - 266414283 comparisons and 15696 similarities\n", " 4m 32s 388‰ - 267102692 comparisons and 15699 similarities\n", " 4m 33s 389‰ - 267791101 comparisons and 15712 similarities\n", " 4m 34s 390‰ - 268479510 comparisons and 15714 similarities\n", " 4m 34s 391‰ - 269167919 comparisons and 15719 similarities\n", " 4m 35s 392‰ - 269856328 comparisons and 15796 similarities\n", " 4m 36s 393‰ - 270544737 comparisons and 15808 similarities\n", " 4m 36s 394‰ - 271233146 comparisons and 15836 similarities\n", " 4m 37s 395‰ - 271921555 comparisons and 15957 similarities\n", " 4m 38s 396‰ - 272609964 comparisons and 15971 similarities\n", " 4m 38s 397‰ - 273298373 comparisons and 16063 similarities\n", " 4m 39s 398‰ - 273986782 comparisons and 16134 similarities\n", " 4m 40s 399‰ - 274675191 comparisons and 16315 similarities\n", " 4m 40s 400‰ - 275363600 comparisons and 16367 similarities\n", " 4m 41s 401‰ - 276052009 comparisons and 16494 similarities\n", " 4m 41s 402‰ - 276740418 comparisons and 16520 similarities\n", " 4m 42s 403‰ - 277428827 comparisons and 16555 similarities\n", " 4m 43s 404‰ - 278117236 comparisons and 16656 similarities\n", " 4m 43s 405‰ - 278805645 comparisons and 16677 similarities\n", " 4m 44s 406‰ - 279494054 comparisons and 16739 similarities\n", " 4m 45s 407‰ - 280182463 comparisons and 17006 similarities\n", " 4m 45s 408‰ - 280870872 comparisons and 17133 similarities\n", " 4m 46s 409‰ - 281559281 comparisons and 17211 similarities\n", " 4m 47s 410‰ - 282247690 comparisons and 17228 similarities\n", " 4m 47s 411‰ - 282936099 comparisons and 17283 similarities\n", " 4m 48s 412‰ - 283624508 comparisons and 17430 similarities\n", " 4m 49s 413‰ - 284312917 comparisons and 17512 similarities\n", " 4m 49s 414‰ - 285001326 comparisons and 17588 similarities\n", " 4m 50s 415‰ - 285689735 comparisons and 17661 similarities\n", " 4m 51s 416‰ - 286378144 comparisons and 17701 similarities\n", " 4m 52s 417‰ - 287066553 comparisons and 17750 similarities\n", " 4m 52s 418‰ - 287754962 comparisons and 17879 similarities\n", " 4m 53s 419‰ - 288443371 comparisons and 18034 similarities\n", " 4m 53s 420‰ - 289131780 comparisons and 18223 similarities\n", " 4m 54s 421‰ - 289820189 comparisons and 18429 similarities\n", " 4m 55s 422‰ - 290508598 comparisons and 18583 similarities\n", " 4m 55s 423‰ - 291197007 comparisons and 18649 similarities\n", " 4m 56s 424‰ - 291885416 comparisons and 18838 similarities\n", " 4m 57s 425‰ - 292573825 comparisons and 18857 similarities\n", " 4m 57s 426‰ - 293262234 comparisons and 18887 similarities\n", " 4m 58s 427‰ - 293950643 comparisons and 18982 similarities\n", " 4m 59s 428‰ - 294639052 comparisons and 19218 similarities\n", " 4m 59s 429‰ - 295327461 comparisons and 19594 similarities\n", " 5m 00s 430‰ - 296015870 comparisons and 19616 similarities\n", " 5m 00s 431‰ - 296704279 comparisons and 19700 similarities\n", " 5m 01s 432‰ - 297392688 comparisons and 20306 similarities\n", " 5m 02s 433‰ - 298081097 comparisons and 20455 similarities\n", " 5m 03s 434‰ - 298769506 comparisons and 20716 similarities\n", " 5m 03s 435‰ - 299457915 comparisons and 20936 similarities\n", " 5m 04s 436‰ - 300146324 comparisons and 21114 similarities\n", " 5m 04s 437‰ - 300834733 comparisons and 21312 similarities\n", " 5m 05s 438‰ - 301523142 comparisons and 21491 similarities\n", " 5m 06s 439‰ - 302211551 comparisons and 21503 similarities\n", " 5m 06s 440‰ - 302899960 comparisons and 21686 similarities\n", " 5m 07s 441‰ - 303588369 comparisons and 21721 similarities\n", " 5m 08s 442‰ - 304276778 comparisons and 21776 similarities\n", " 5m 08s 443‰ - 304965187 comparisons and 21878 similarities\n", " 5m 09s 444‰ - 305653596 comparisons and 21905 similarities\n", " 5m 10s 445‰ - 306342005 comparisons and 21922 similarities\n", " 5m 10s 446‰ - 307030414 comparisons and 21976 similarities\n", " 5m 11s 447‰ - 307718823 comparisons and 22019 similarities\n", " 5m 12s 448‰ - 308407232 comparisons and 22051 similarities\n", " 5m 12s 449‰ - 309095641 comparisons and 22075 similarities\n", " 5m 13s 450‰ - 309784050 comparisons and 22078 similarities\n", " 5m 14s 451‰ - 310472459 comparisons and 22095 similarities\n", " 5m 15s 452‰ - 311160868 comparisons and 22100 similarities\n", " 5m 15s 453‰ - 311849277 comparisons and 22107 similarities\n", " 5m 16s 454‰ - 312537686 comparisons and 22148 similarities\n", " 5m 16s 455‰ - 313226095 comparisons and 22238 similarities\n", " 5m 17s 456‰ - 313914504 comparisons and 22241 similarities\n", " 5m 18s 457‰ - 314602913 comparisons and 22247 similarities\n", " 5m 19s 458‰ - 315291322 comparisons and 22278 similarities\n", " 5m 19s 459‰ - 315979731 comparisons and 22381 similarities\n", " 5m 20s 460‰ - 316668140 comparisons and 22381 similarities\n", " 5m 20s 461‰ - 317356549 comparisons and 22395 similarities\n", " 5m 21s 462‰ - 318044958 comparisons and 22488 similarities\n", " 5m 22s 463‰ - 318733367 comparisons and 22511 similarities\n", " 5m 22s 464‰ - 319421776 comparisons and 22527 similarities\n", " 5m 23s 465‰ - 320110185 comparisons and 22527 similarities\n", " 5m 24s 466‰ - 320798594 comparisons and 22532 similarities\n", " 5m 25s 467‰ - 321487003 comparisons and 22546 similarities\n", " 5m 25s 468‰ - 322175412 comparisons and 22597 similarities\n", " 5m 26s 469‰ - 322863821 comparisons and 22608 similarities\n", " 5m 27s 470‰ - 323552230 comparisons and 22617 similarities\n", " 5m 28s 471‰ - 324240639 comparisons and 22628 similarities\n", " 5m 28s 472‰ - 324929048 comparisons and 22650 similarities\n", " 5m 29s 473‰ - 325617457 comparisons and 22692 similarities\n", " 5m 30s 474‰ - 326305866 comparisons and 22790 similarities\n", " 5m 30s 475‰ - 326994275 comparisons and 23003 similarities\n", " 5m 31s 476‰ - 327682684 comparisons and 23024 similarities\n", " 5m 32s 477‰ - 328371093 comparisons and 23046 similarities\n", " 5m 33s 478‰ - 329059502 comparisons and 23072 similarities\n", " 5m 33s 479‰ - 329747911 comparisons and 23109 similarities\n", " 5m 34s 480‰ - 330436320 comparisons and 23131 similarities\n", " 5m 35s 481‰ - 331124729 comparisons and 23142 similarities\n", " 5m 36s 482‰ - 331813138 comparisons and 23160 similarities\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 5m 36s 483‰ - 332501547 comparisons and 23171 similarities\n", " 5m 37s 484‰ - 333189956 comparisons and 23181 similarities\n", " 5m 38s 485‰ - 333878365 comparisons and 23294 similarities\n", " 5m 38s 486‰ - 334566774 comparisons and 23325 similarities\n", " 5m 39s 487‰ - 335255183 comparisons and 23354 similarities\n", " 5m 40s 488‰ - 335943592 comparisons and 23414 similarities\n", " 5m 41s 489‰ - 336632001 comparisons and 23452 similarities\n", " 5m 41s 490‰ - 337320410 comparisons and 23459 similarities\n", " 5m 42s 491‰ - 338008819 comparisons and 23480 similarities\n", " 5m 43s 492‰ - 338697228 comparisons and 23612 similarities\n", " 5m 43s 493‰ - 339385637 comparisons and 23626 similarities\n", " 5m 44s 494‰ - 340074046 comparisons and 23721 similarities\n", " 5m 45s 495‰ - 340762455 comparisons and 23760 similarities\n", " 5m 45s 496‰ - 341450864 comparisons and 23777 similarities\n", " 5m 46s 497‰ - 342139273 comparisons and 23787 similarities\n", " 5m 47s 498‰ - 342827682 comparisons and 24007 similarities\n", " 5m 47s 499‰ - 343516091 comparisons and 24145 similarities\n", " 5m 48s 500‰ - 344204500 comparisons and 24341 similarities\n", " 5m 49s 501‰ - 344892909 comparisons and 24713 similarities\n", " 5m 49s 502‰ - 345581318 comparisons and 24766 similarities\n", " 5m 50s 503‰ - 346269727 comparisons and 24795 similarities\n", " 5m 51s 504‰ - 346958136 comparisons and 25033 similarities\n", " 5m 51s 505‰ - 347646545 comparisons and 25119 similarities\n", " 5m 52s 506‰ - 348334954 comparisons and 25238 similarities\n", " 5m 53s 507‰ - 349023363 comparisons and 25365 similarities\n", " 5m 53s 508‰ - 349711772 comparisons and 25421 similarities\n", " 5m 54s 509‰ - 350400181 comparisons and 25491 similarities\n", " 5m 54s 510‰ - 351088590 comparisons and 25661 similarities\n", " 5m 55s 511‰ - 351776999 comparisons and 25805 similarities\n", " 5m 56s 512‰ - 352465408 comparisons and 25908 similarities\n", " 5m 56s 513‰ - 353153817 comparisons and 25916 similarities\n", " 5m 57s 514‰ - 353842226 comparisons and 25969 similarities\n", " 5m 58s 515‰ - 354530635 comparisons and 26161 similarities\n", " 5m 58s 516‰ - 355219044 comparisons and 26168 similarities\n", " 5m 59s 517‰ - 355907453 comparisons and 26269 similarities\n", " 6m 00s 518‰ - 356595862 comparisons and 26307 similarities\n", " 6m 00s 519‰ - 357284271 comparisons and 26349 similarities\n", " 6m 01s 520‰ - 357972680 comparisons and 26472 similarities\n", " 6m 02s 521‰ - 358661089 comparisons and 26577 similarities\n", " 6m 02s 522‰ - 359349498 comparisons and 26657 similarities\n", " 6m 03s 523‰ - 360037907 comparisons and 26752 similarities\n", " 6m 04s 524‰ - 360726316 comparisons and 27008 similarities\n", " 6m 04s 525‰ - 361414725 comparisons and 27226 similarities\n", " 6m 05s 526‰ - 362103134 comparisons and 27637 similarities\n", " 6m 05s 527‰ - 362791543 comparisons and 27936 similarities\n", " 6m 06s 528‰ - 363479952 comparisons and 28089 similarities\n", " 6m 07s 529‰ - 364168361 comparisons and 28219 similarities\n", " 6m 07s 530‰ - 364856770 comparisons and 28233 similarities\n", " 6m 08s 531‰ - 365545179 comparisons and 28308 similarities\n", " 6m 09s 532‰ - 366233588 comparisons and 28353 similarities\n", " 6m 09s 533‰ - 366921997 comparisons and 28380 similarities\n", " 6m 10s 534‰ - 367610406 comparisons and 28387 similarities\n", " 6m 11s 535‰ - 368298815 comparisons and 28426 similarities\n", " 6m 11s 536‰ - 368987224 comparisons and 28506 similarities\n", " 6m 12s 537‰ - 369675633 comparisons and 28515 similarities\n", " 6m 13s 538‰ - 370364042 comparisons and 28634 similarities\n", " 6m 13s 539‰ - 371052451 comparisons and 28659 similarities\n", " 6m 14s 540‰ - 371740860 comparisons and 28761 similarities\n", " 6m 15s 541‰ - 372429269 comparisons and 28843 similarities\n", " 6m 15s 542‰ - 373117678 comparisons and 28873 similarities\n", " 6m 16s 543‰ - 373806087 comparisons and 28950 similarities\n", " 6m 17s 544‰ - 374494496 comparisons and 29079 similarities\n", " 6m 17s 545‰ - 375182905 comparisons and 29190 similarities\n", " 6m 18s 546‰ - 375871314 comparisons and 29265 similarities\n", " 6m 19s 547‰ - 376559723 comparisons and 29343 similarities\n", " 6m 19s 548‰ - 377248132 comparisons and 29379 similarities\n", " 6m 20s 549‰ - 377936541 comparisons and 29546 similarities\n", " 6m 21s 550‰ - 378624950 comparisons and 29623 similarities\n", " 6m 21s 551‰ - 379313359 comparisons and 29636 similarities\n", " 6m 22s 552‰ - 380001768 comparisons and 29655 similarities\n", " 6m 23s 553‰ - 380690177 comparisons and 29678 similarities\n", " 6m 23s 554‰ - 381378586 comparisons and 29687 similarities\n", " 6m 24s 555‰ - 382066995 comparisons and 29694 similarities\n", " 6m 25s 556‰ - 382755404 comparisons and 29700 similarities\n", " 6m 25s 557‰ - 383443813 comparisons and 29767 similarities\n", " 6m 26s 558‰ - 384132222 comparisons and 29777 similarities\n", " 6m 27s 559‰ - 384820631 comparisons and 29875 similarities\n", " 6m 28s 560‰ - 385509040 comparisons and 30103 similarities\n", " 6m 28s 561‰ - 386197449 comparisons and 30410 similarities\n", " 6m 29s 562‰ - 386885858 comparisons and 30514 similarities\n", " 6m 29s 563‰ - 387574267 comparisons and 30780 similarities\n", " 6m 30s 564‰ - 388262676 comparisons and 31054 similarities\n", " 6m 31s 565‰ - 388951085 comparisons and 31106 similarities\n", " 6m 32s 566‰ - 389639494 comparisons and 31117 similarities\n", " 6m 32s 567‰ - 390327903 comparisons and 31124 similarities\n", " 6m 33s 568‰ - 391016312 comparisons and 31205 similarities\n", " 6m 34s 569‰ - 391704721 comparisons and 31392 similarities\n", " 6m 34s 570‰ - 392393130 comparisons and 31502 similarities\n", " 6m 35s 571‰ - 393081539 comparisons and 31534 similarities\n", " 6m 36s 572‰ - 393769948 comparisons and 31599 similarities\n", " 6m 36s 573‰ - 394458357 comparisons and 31665 similarities\n", " 6m 37s 574‰ - 395146766 comparisons and 31703 similarities\n", " 6m 38s 575‰ - 395835175 comparisons and 31797 similarities\n", " 6m 38s 576‰ - 396523584 comparisons and 32058 similarities\n", " 6m 39s 577‰ - 397211993 comparisons and 32225 similarities\n", " 6m 40s 578‰ - 397900402 comparisons and 32425 similarities\n", " 6m 40s 579‰ - 398588811 comparisons and 32459 similarities\n", " 6m 41s 580‰ - 399277220 comparisons and 32538 similarities\n", " 6m 42s 581‰ - 399965629 comparisons and 32571 similarities\n", " 6m 42s 582‰ - 400654038 comparisons and 32666 similarities\n", " 6m 43s 583‰ - 401342447 comparisons and 32884 similarities\n", " 6m 44s 584‰ - 402030856 comparisons and 33176 similarities\n", " 6m 44s 585‰ - 402719265 comparisons and 33219 similarities\n", " 6m 45s 586‰ - 403407674 comparisons and 33269 similarities\n", " 6m 46s 587‰ - 404096083 comparisons and 33385 similarities\n", " 6m 46s 588‰ - 404784492 comparisons and 33456 similarities\n", " 6m 47s 589‰ - 405472901 comparisons and 33480 similarities\n", " 6m 48s 590‰ - 406161310 comparisons and 33586 similarities\n", " 6m 49s 591‰ - 406849719 comparisons and 33621 similarities\n", " 6m 49s 592‰ - 407538128 comparisons and 33646 similarities\n", " 6m 50s 593‰ - 408226537 comparisons and 33664 similarities\n", " 6m 51s 594‰ - 408914946 comparisons and 33735 similarities\n", " 6m 51s 595‰ - 409603355 comparisons and 33818 similarities\n", " 6m 52s 596‰ - 410291764 comparisons and 33825 similarities\n", " 6m 53s 597‰ - 410980173 comparisons and 33848 similarities\n", " 6m 53s 598‰ - 411668582 comparisons and 33876 similarities\n", " 6m 54s 599‰ - 412356991 comparisons and 33917 similarities\n", " 6m 55s 600‰ - 413045400 comparisons and 34012 similarities\n", " 6m 55s 601‰ - 413733809 comparisons and 34044 similarities\n", " 6m 56s 602‰ - 414422218 comparisons and 34095 similarities\n", " 6m 57s 603‰ - 415110627 comparisons and 34130 similarities\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 6m 57s 604‰ - 415799036 comparisons and 34182 similarities\n", " 6m 58s 605‰ - 416487445 comparisons and 34207 similarities\n", " 6m 59s 606‰ - 417175854 comparisons and 34243 similarities\n", " 6m 59s 607‰ - 417864263 comparisons and 34256 similarities\n", " 7m 00s 608‰ - 418552672 comparisons and 34296 similarities\n", " 7m 01s 609‰ - 419241081 comparisons and 34349 similarities\n", " 7m 01s 610‰ - 419929490 comparisons and 34400 similarities\n", " 7m 02s 611‰ - 420617899 comparisons and 34505 similarities\n", " 7m 03s 612‰ - 421306308 comparisons and 34619 similarities\n", " 7m 03s 613‰ - 421994717 comparisons and 34746 similarities\n", " 7m 04s 614‰ - 422683126 comparisons and 34881 similarities\n", " 7m 04s 615‰ - 423371535 comparisons and 35046 similarities\n", " 7m 05s 616‰ - 424059944 comparisons and 35142 similarities\n", " 7m 06s 617‰ - 424748353 comparisons and 35236 similarities\n", " 7m 06s 618‰ - 425436762 comparisons and 35279 similarities\n", " 7m 07s 619‰ - 426125171 comparisons and 35350 similarities\n", " 7m 08s 620‰ - 426813580 comparisons and 35400 similarities\n", " 7m 08s 621‰ - 427501989 comparisons and 35435 similarities\n", " 7m 09s 622‰ - 428190398 comparisons and 35450 similarities\n", " 7m 10s 623‰ - 428878807 comparisons and 35501 similarities\n", " 7m 10s 624‰ - 429567216 comparisons and 35521 similarities\n", " 7m 11s 625‰ - 430255625 comparisons and 35602 similarities\n", " 7m 12s 626‰ - 430944034 comparisons and 35620 similarities\n", " 7m 13s 627‰ - 431632443 comparisons and 35686 similarities\n", " 7m 13s 628‰ - 432320852 comparisons and 35775 similarities\n", " 7m 14s 629‰ - 433009261 comparisons and 35811 similarities\n", " 7m 15s 630‰ - 433697670 comparisons and 35823 similarities\n", " 7m 15s 631‰ - 434386079 comparisons and 35828 similarities\n", " 7m 16s 632‰ - 435074488 comparisons and 35867 similarities\n", " 7m 17s 633‰ - 435762897 comparisons and 35877 similarities\n", " 7m 17s 634‰ - 436451306 comparisons and 35882 similarities\n", " 7m 18s 635‰ - 437139715 comparisons and 35898 similarities\n", " 7m 19s 636‰ - 437828124 comparisons and 35949 similarities\n", " 7m 20s 637‰ - 438516533 comparisons and 35949 similarities\n", " 7m 20s 638‰ - 439204942 comparisons and 35964 similarities\n", " 7m 21s 639‰ - 439893351 comparisons and 35964 similarities\n", " 7m 22s 640‰ - 440581760 comparisons and 36000 similarities\n", " 7m 22s 641‰ - 441270169 comparisons and 36017 similarities\n", " 7m 23s 642‰ - 441958578 comparisons and 36040 similarities\n", " 7m 24s 643‰ - 442646987 comparisons and 36052 similarities\n", " 7m 24s 644‰ - 443335396 comparisons and 36086 similarities\n", " 7m 25s 645‰ - 444023805 comparisons and 36129 similarities\n", " 7m 26s 646‰ - 444712214 comparisons and 36176 similarities\n", " 7m 26s 647‰ - 445400623 comparisons and 36247 similarities\n", " 7m 27s 648‰ - 446089032 comparisons and 36341 similarities\n", " 7m 28s 649‰ - 446777441 comparisons and 36409 similarities\n", " 7m 28s 650‰ - 447465850 comparisons and 36439 similarities\n", " 7m 29s 651‰ - 448154259 comparisons and 36492 similarities\n", " 7m 30s 652‰ - 448842668 comparisons and 36570 similarities\n", " 7m 30s 653‰ - 449531077 comparisons and 36624 similarities\n", " 7m 31s 654‰ - 450219486 comparisons and 36669 similarities\n", " 7m 32s 655‰ - 450907895 comparisons and 36738 similarities\n", " 7m 32s 656‰ - 451596304 comparisons and 36813 similarities\n", " 7m 33s 657‰ - 452284713 comparisons and 36887 similarities\n", " 7m 33s 658‰ - 452973122 comparisons and 36901 similarities\n", " 7m 34s 659‰ - 453661531 comparisons and 37002 similarities\n", " 7m 35s 660‰ - 454349940 comparisons and 37015 similarities\n", " 7m 35s 661‰ - 455038349 comparisons and 37064 similarities\n", " 7m 36s 662‰ - 455726758 comparisons and 37140 similarities\n", " 7m 37s 663‰ - 456415167 comparisons and 37180 similarities\n", " 7m 37s 664‰ - 457103576 comparisons and 37225 similarities\n", " 7m 38s 665‰ - 457791985 comparisons and 37288 similarities\n", " 7m 39s 666‰ - 458480394 comparisons and 37426 similarities\n", " 7m 39s 667‰ - 459168803 comparisons and 37504 similarities\n", " 7m 40s 668‰ - 459857212 comparisons and 37552 similarities\n", " 7m 41s 669‰ - 460545621 comparisons and 37564 similarities\n", " 7m 42s 670‰ - 461234030 comparisons and 37569 similarities\n", " 7m 42s 671‰ - 461922439 comparisons and 37579 similarities\n", " 7m 43s 672‰ - 462610848 comparisons and 37651 similarities\n", " 7m 44s 673‰ - 463299257 comparisons and 37658 similarities\n", " 7m 44s 674‰ - 463987666 comparisons and 37704 similarities\n", " 7m 45s 675‰ - 464676075 comparisons and 37719 similarities\n", " 7m 46s 676‰ - 465364484 comparisons and 37776 similarities\n", " 7m 46s 677‰ - 466052893 comparisons and 37952 similarities\n", " 7m 47s 678‰ - 466741302 comparisons and 37981 similarities\n", " 7m 47s 679‰ - 467429711 comparisons and 38236 similarities\n", " 7m 48s 680‰ - 468118120 comparisons and 38369 similarities\n", " 7m 49s 681‰ - 468806529 comparisons and 38678 similarities\n", " 7m 49s 682‰ - 469494938 comparisons and 38973 similarities\n", " 7m 50s 683‰ - 470183347 comparisons and 39241 similarities\n", " 7m 50s 684‰ - 470871756 comparisons and 39483 similarities\n", " 7m 51s 685‰ - 471560165 comparisons and 39639 similarities\n", " 7m 52s 686‰ - 472248574 comparisons and 39972 similarities\n", " 7m 53s 687‰ - 472936983 comparisons and 40127 similarities\n", " 7m 53s 688‰ - 473625392 comparisons and 40278 similarities\n", " 7m 54s 689‰ - 474313801 comparisons and 40382 similarities\n", " 7m 55s 690‰ - 475002210 comparisons and 40651 similarities\n", " 7m 55s 691‰ - 475690619 comparisons and 40877 similarities\n", " 7m 56s 692‰ - 476379028 comparisons and 41099 similarities\n", " 7m 57s 693‰ - 477067437 comparisons and 41181 similarities\n", " 7m 57s 694‰ - 477755846 comparisons and 41213 similarities\n", " 7m 58s 695‰ - 478444255 comparisons and 41232 similarities\n", " 7m 59s 696‰ - 479132664 comparisons and 41287 similarities\n", " 7m 59s 697‰ - 479821073 comparisons and 41326 similarities\n", " 8m 00s 698‰ - 480509482 comparisons and 41364 similarities\n", " 8m 01s 699‰ - 481197891 comparisons and 41433 similarities\n", " 8m 01s 700‰ - 481886300 comparisons and 41498 similarities\n", " 8m 02s 701‰ - 482574709 comparisons and 41540 similarities\n", " 8m 02s 702‰ - 483263118 comparisons and 41667 similarities\n", " 8m 03s 703‰ - 483951527 comparisons and 41882 similarities\n", " 8m 04s 704‰ - 484639936 comparisons and 41963 similarities\n", " 8m 04s 705‰ - 485328345 comparisons and 42012 similarities\n", " 8m 05s 706‰ - 486016754 comparisons and 42200 similarities\n", " 8m 06s 707‰ - 486705163 comparisons and 42236 similarities\n", " 8m 06s 708‰ - 487393572 comparisons and 42329 similarities\n", " 8m 07s 709‰ - 488081981 comparisons and 42521 similarities\n", " 8m 08s 710‰ - 488770390 comparisons and 42600 similarities\n", " 8m 08s 711‰ - 489458799 comparisons and 42641 similarities\n", " 8m 09s 712‰ - 490147208 comparisons and 42688 similarities\n", " 8m 10s 713‰ - 490835617 comparisons and 42839 similarities\n", " 8m 10s 714‰ - 491524026 comparisons and 42933 similarities\n", " 8m 11s 715‰ - 492212435 comparisons and 42984 similarities\n", " 8m 12s 716‰ - 492900844 comparisons and 43008 similarities\n", " 8m 12s 717‰ - 493589253 comparisons and 43089 similarities\n", " 8m 13s 718‰ - 494277662 comparisons and 43253 similarities\n", " 8m 13s 719‰ - 494966071 comparisons and 43461 similarities\n", " 8m 14s 720‰ - 495654480 comparisons and 43518 similarities\n", " 8m 15s 721‰ - 496342889 comparisons and 43559 similarities\n", " 8m 15s 722‰ - 497031298 comparisons and 43588 similarities\n", " 8m 16s 723‰ - 497719707 comparisons and 43627 similarities\n", " 8m 17s 724‰ - 498408116 comparisons and 43654 similarities\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 8m 17s 725‰ - 499096525 comparisons and 43700 similarities\n", " 8m 18s 726‰ - 499784934 comparisons and 43748 similarities\n", " 8m 19s 727‰ - 500473343 comparisons and 43822 similarities\n", " 8m 19s 728‰ - 501161752 comparisons and 43836 similarities\n", " 8m 20s 729‰ - 501850161 comparisons and 43916 similarities\n", " 8m 21s 730‰ - 502538570 comparisons and 43938 similarities\n", " 8m 21s 731‰ - 503226979 comparisons and 43962 similarities\n", " 8m 22s 732‰ - 503915388 comparisons and 44003 similarities\n", " 8m 23s 733‰ - 504603797 comparisons and 44016 similarities\n", " 8m 24s 734‰ - 505292206 comparisons and 44031 similarities\n", " 8m 24s 735‰ - 505980615 comparisons and 44035 similarities\n", " 8m 25s 736‰ - 506669024 comparisons and 44062 similarities\n", " 8m 26s 737‰ - 507357433 comparisons and 44094 similarities\n", " 8m 26s 738‰ - 508045842 comparisons and 44129 similarities\n", " 8m 27s 739‰ - 508734251 comparisons and 44135 similarities\n", " 8m 28s 740‰ - 509422660 comparisons and 44192 similarities\n", " 8m 28s 741‰ - 510111069 comparisons and 44218 similarities\n", " 8m 29s 742‰ - 510799478 comparisons and 44233 similarities\n", " 8m 30s 743‰ - 511487887 comparisons and 44277 similarities\n", " 8m 30s 744‰ - 512176296 comparisons and 44301 similarities\n", " 8m 31s 745‰ - 512864705 comparisons and 44320 similarities\n", " 8m 32s 746‰ - 513553114 comparisons and 44328 similarities\n", " 8m 32s 747‰ - 514241523 comparisons and 44348 similarities\n", " 8m 33s 748‰ - 514929932 comparisons and 44363 similarities\n", " 8m 34s 749‰ - 515618341 comparisons and 44377 similarities\n", " 8m 35s 750‰ - 516306750 comparisons and 44399 similarities\n", " 8m 35s 751‰ - 516995159 comparisons and 44446 similarities\n", " 8m 36s 752‰ - 517683568 comparisons and 44512 similarities\n", " 8m 37s 753‰ - 518371977 comparisons and 44573 similarities\n", " 8m 37s 754‰ - 519060386 comparisons and 44581 similarities\n", " 8m 38s 755‰ - 519748795 comparisons and 44630 similarities\n", " 8m 39s 756‰ - 520437204 comparisons and 44648 similarities\n", " 8m 39s 757‰ - 521125613 comparisons and 44671 similarities\n", " 8m 40s 758‰ - 521814022 comparisons and 44705 similarities\n", " 8m 41s 759‰ - 522502431 comparisons and 44707 similarities\n", " 8m 42s 760‰ - 523190840 comparisons and 44720 similarities\n", " 8m 42s 761‰ - 523879249 comparisons and 44756 similarities\n", " 8m 43s 762‰ - 524567658 comparisons and 44811 similarities\n", " 8m 44s 763‰ - 525256067 comparisons and 44847 similarities\n", " 8m 45s 764‰ - 525944476 comparisons and 44881 similarities\n", " 8m 46s 765‰ - 526632885 comparisons and 44915 similarities\n", " 8m 46s 766‰ - 527321294 comparisons and 44938 similarities\n", " 8m 47s 767‰ - 528009703 comparisons and 45005 similarities\n", " 8m 48s 768‰ - 528698112 comparisons and 45021 similarities\n", " 8m 48s 769‰ - 529386521 comparisons and 45035 similarities\n", " 8m 49s 770‰ - 530074930 comparisons and 45078 similarities\n", " 8m 50s 771‰ - 530763339 comparisons and 45089 similarities\n", " 8m 50s 772‰ - 531451748 comparisons and 45096 similarities\n", " 8m 51s 773‰ - 532140157 comparisons and 45114 similarities\n", " 8m 52s 774‰ - 532828566 comparisons and 45122 similarities\n", " 8m 53s 775‰ - 533516975 comparisons and 45163 similarities\n", " 8m 53s 776‰ - 534205384 comparisons and 45183 similarities\n", " 8m 54s 777‰ - 534893793 comparisons and 45222 similarities\n", " 8m 55s 778‰ - 535582202 comparisons and 45236 similarities\n", " 8m 55s 779‰ - 536270611 comparisons and 45245 similarities\n", " 8m 56s 780‰ - 536959020 comparisons and 45264 similarities\n", " 8m 57s 781‰ - 537647429 comparisons and 45320 similarities\n", " 8m 57s 782‰ - 538335838 comparisons and 45383 similarities\n", " 8m 58s 783‰ - 539024247 comparisons and 45384 similarities\n", " 8m 59s 784‰ - 539712656 comparisons and 45441 similarities\n", " 8m 59s 785‰ - 540401065 comparisons and 45479 similarities\n", " 9m 00s 786‰ - 541089474 comparisons and 45537 similarities\n", " 9m 01s 787‰ - 541777883 comparisons and 45556 similarities\n", " 9m 01s 788‰ - 542466292 comparisons and 45599 similarities\n", " 9m 02s 789‰ - 543154701 comparisons and 45614 similarities\n", " 9m 03s 790‰ - 543843110 comparisons and 45620 similarities\n", " 9m 04s 791‰ - 544531519 comparisons and 45620 similarities\n", " 9m 04s 792‰ - 545219928 comparisons and 45624 similarities\n", " 9m 05s 793‰ - 545908337 comparisons and 45636 similarities\n", " 9m 06s 794‰ - 546596746 comparisons and 45637 similarities\n", " 9m 07s 795‰ - 547285155 comparisons and 45637 similarities\n", " 9m 07s 796‰ - 547973564 comparisons and 45640 similarities\n", " 9m 08s 797‰ - 548661973 comparisons and 45640 similarities\n", " 9m 09s 798‰ - 549350382 comparisons and 45654 similarities\n", " 9m 10s 799‰ - 550038791 comparisons and 45654 similarities\n", " 9m 10s 800‰ - 550727200 comparisons and 45657 similarities\n", " 9m 11s 801‰ - 551415609 comparisons and 45689 similarities\n", " 9m 12s 802‰ - 552104018 comparisons and 45690 similarities\n", " 9m 13s 803‰ - 552792427 comparisons and 45691 similarities\n", " 9m 14s 804‰ - 553480836 comparisons and 45699 similarities\n", " 9m 14s 805‰ - 554169245 comparisons and 45714 similarities\n", " 9m 15s 806‰ - 554857654 comparisons and 45719 similarities\n", " 9m 16s 807‰ - 555546063 comparisons and 45740 similarities\n", " 9m 17s 808‰ - 556234472 comparisons and 45750 similarities\n", " 9m 17s 809‰ - 556922881 comparisons and 45755 similarities\n", " 9m 18s 810‰ - 557611290 comparisons and 45772 similarities\n", " 9m 19s 811‰ - 558299699 comparisons and 45787 similarities\n", " 9m 19s 812‰ - 558988108 comparisons and 45806 similarities\n", " 9m 20s 813‰ - 559676517 comparisons and 45845 similarities\n", " 9m 21s 814‰ - 560364926 comparisons and 45855 similarities\n", " 9m 22s 815‰ - 561053335 comparisons and 45891 similarities\n", " 9m 23s 816‰ - 561741744 comparisons and 45933 similarities\n", " 9m 24s 817‰ - 562430153 comparisons and 45977 similarities\n", " 9m 25s 818‰ - 563118562 comparisons and 46010 similarities\n", " 9m 25s 819‰ - 563806971 comparisons and 46035 similarities\n", " 9m 26s 820‰ - 564495380 comparisons and 46053 similarities\n", " 9m 27s 821‰ - 565183789 comparisons and 46087 similarities\n", " 9m 28s 822‰ - 565872198 comparisons and 46097 similarities\n", " 9m 28s 823‰ - 566560607 comparisons and 46110 similarities\n", " 9m 29s 824‰ - 567249016 comparisons and 46131 similarities\n", " 9m 30s 825‰ - 567937425 comparisons and 46147 similarities\n", " 9m 31s 826‰ - 568625834 comparisons and 46163 similarities\n", " 9m 32s 827‰ - 569314243 comparisons and 46171 similarities\n", " 9m 33s 828‰ - 570002652 comparisons and 46185 similarities\n", " 9m 34s 829‰ - 570691061 comparisons and 46200 similarities\n", " 9m 34s 830‰ - 571379470 comparisons and 46218 similarities\n", " 9m 35s 831‰ - 572067879 comparisons and 46220 similarities\n", " 9m 36s 832‰ - 572756288 comparisons and 46228 similarities\n", " 9m 37s 833‰ - 573444697 comparisons and 46252 similarities\n", " 9m 38s 834‰ - 574133106 comparisons and 46269 similarities\n", " 9m 38s 835‰ - 574821515 comparisons and 46279 similarities\n", " 9m 39s 836‰ - 575509924 comparisons and 46282 similarities\n", " 9m 40s 837‰ - 576198333 comparisons and 46288 similarities\n", " 9m 41s 838‰ - 576886742 comparisons and 46302 similarities\n", " 9m 41s 839‰ - 577575151 comparisons and 46321 similarities\n", " 9m 42s 840‰ - 578263560 comparisons and 46386 similarities\n", " 9m 43s 841‰ - 578951969 comparisons and 46451 similarities\n", " 9m 43s 842‰ - 579640378 comparisons and 46506 similarities\n", " 9m 44s 843‰ - 580328787 comparisons and 46580 similarities\n", " 9m 45s 844‰ - 581017196 comparisons and 46626 similarities\n", " 9m 45s 845‰ - 581705605 comparisons and 46679 similarities\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 9m 46s 846‰ - 582394014 comparisons and 46720 similarities\n", " 9m 47s 847‰ - 583082423 comparisons and 46735 similarities\n", " 9m 47s 848‰ - 583770832 comparisons and 46776 similarities\n", " 9m 48s 849‰ - 584459241 comparisons and 46842 similarities\n", " 9m 49s 850‰ - 585147650 comparisons and 46869 similarities\n", " 9m 50s 851‰ - 585836059 comparisons and 46907 similarities\n", " 9m 51s 852‰ - 586524468 comparisons and 46922 similarities\n", " 9m 51s 853‰ - 587212877 comparisons and 46942 similarities\n", " 9m 52s 854‰ - 587901286 comparisons and 46985 similarities\n", " 9m 53s 855‰ - 588589695 comparisons and 46993 similarities\n", " 9m 54s 856‰ - 589278104 comparisons and 47010 similarities\n", " 9m 55s 857‰ - 589966513 comparisons and 47032 similarities\n", " 9m 55s 858‰ - 590654922 comparisons and 47058 similarities\n", " 9m 56s 859‰ - 591343331 comparisons and 47080 similarities\n", " 9m 57s 860‰ - 592031740 comparisons and 47114 similarities\n", " 9m 58s 861‰ - 592720149 comparisons and 47132 similarities\n", " 9m 59s 862‰ - 593408558 comparisons and 47168 similarities\n", "10m 00s 863‰ - 594096967 comparisons and 47194 similarities\n", "10m 01s 864‰ - 594785376 comparisons and 47239 similarities\n", "10m 01s 865‰ - 595473785 comparisons and 47268 similarities\n", "10m 02s 866‰ - 596162194 comparisons and 47287 similarities\n", "10m 03s 867‰ - 596850603 comparisons and 47308 similarities\n", "10m 04s 868‰ - 597539012 comparisons and 47325 similarities\n", "10m 05s 869‰ - 598227421 comparisons and 47334 similarities\n", "10m 06s 870‰ - 598915830 comparisons and 47340 similarities\n", "10m 07s 871‰ - 599604239 comparisons and 47345 similarities\n", "10m 08s 872‰ - 600292648 comparisons and 47360 similarities\n", "10m 09s 873‰ - 600981057 comparisons and 47387 similarities\n", "10m 09s 874‰ - 601669466 comparisons and 47415 similarities\n", "10m 10s 875‰ - 602357875 comparisons and 47444 similarities\n", "10m 11s 876‰ - 603046284 comparisons and 47479 similarities\n", "10m 12s 877‰ - 603734693 comparisons and 47525 similarities\n", "10m 13s 878‰ - 604423102 comparisons and 47540 similarities\n", "10m 14s 879‰ - 605111511 comparisons and 47586 similarities\n", "10m 15s 880‰ - 605799920 comparisons and 47625 similarities\n", "10m 15s 881‰ - 606488329 comparisons and 47658 similarities\n", "10m 16s 882‰ - 607176738 comparisons and 47689 similarities\n", "10m 17s 883‰ - 607865147 comparisons and 47709 similarities\n", "10m 18s 884‰ - 608553556 comparisons and 47745 similarities\n", "10m 19s 885‰ - 609241965 comparisons and 47764 similarities\n", "10m 19s 886‰ - 609930374 comparisons and 47803 similarities\n", "10m 20s 887‰ - 610618783 comparisons and 47826 similarities\n", "10m 21s 888‰ - 611307192 comparisons and 47840 similarities\n", "10m 22s 889‰ - 611995601 comparisons and 47863 similarities\n", "10m 23s 890‰ - 612684010 comparisons and 47890 similarities\n", "10m 24s 891‰ - 613372419 comparisons and 47906 similarities\n", "10m 24s 892‰ - 614060828 comparisons and 47916 similarities\n", "10m 25s 893‰ - 614749237 comparisons and 47925 similarities\n", "10m 26s 894‰ - 615437646 comparisons and 47936 similarities\n", "10m 27s 895‰ - 616126055 comparisons and 48027 similarities\n", "10m 28s 896‰ - 616814464 comparisons and 48087 similarities\n", "10m 29s 897‰ - 617502873 comparisons and 48114 similarities\n", "10m 29s 898‰ - 618191282 comparisons and 48138 similarities\n", "10m 30s 899‰ - 618879691 comparisons and 48209 similarities\n", "10m 31s 900‰ - 619568100 comparisons and 48211 similarities\n", "10m 32s 901‰ - 620256509 comparisons and 48230 similarities\n", "10m 33s 902‰ - 620944918 comparisons and 48239 similarities\n", "10m 33s 903‰ - 621633327 comparisons and 48245 similarities\n", "10m 34s 904‰ - 622321736 comparisons and 48286 similarities\n", "10m 35s 905‰ - 623010145 comparisons and 48356 similarities\n", "10m 36s 906‰ - 623698554 comparisons and 48390 similarities\n", "10m 37s 907‰ - 624386963 comparisons and 48403 similarities\n", "10m 37s 908‰ - 625075372 comparisons and 48428 similarities\n", "10m 38s 909‰ - 625763781 comparisons and 48438 similarities\n", "10m 39s 910‰ - 626452190 comparisons and 48469 similarities\n", "10m 40s 911‰ - 627140599 comparisons and 48501 similarities\n", "10m 40s 912‰ - 627829008 comparisons and 48547 similarities\n", "10m 41s 913‰ - 628517417 comparisons and 48565 similarities\n", "10m 42s 914‰ - 629205826 comparisons and 48571 similarities\n", "10m 43s 915‰ - 629894235 comparisons and 48606 similarities\n", "10m 44s 916‰ - 630582644 comparisons and 48631 similarities\n", "10m 44s 917‰ - 631271053 comparisons and 48665 similarities\n", "10m 45s 918‰ - 631959462 comparisons and 48717 similarities\n", "10m 46s 919‰ - 632647871 comparisons and 48736 similarities\n", "10m 47s 920‰ - 633336280 comparisons and 48759 similarities\n", "10m 48s 921‰ - 634024689 comparisons and 48764 similarities\n", "10m 49s 922‰ - 634713098 comparisons and 48822 similarities\n", "10m 49s 923‰ - 635401507 comparisons and 48822 similarities\n", "10m 50s 924‰ - 636089916 comparisons and 48849 similarities\n", "10m 51s 925‰ - 636778325 comparisons and 48860 similarities\n", "10m 52s 926‰ - 637466734 comparisons and 48863 similarities\n", "10m 52s 927‰ - 638155143 comparisons and 48870 similarities\n", "10m 53s 928‰ - 638843552 comparisons and 48879 similarities\n", "10m 54s 929‰ - 639531961 comparisons and 48886 similarities\n", "10m 55s 930‰ - 640220370 comparisons and 48908 similarities\n", "10m 56s 931‰ - 640908779 comparisons and 48950 similarities\n", "10m 56s 932‰ - 641597188 comparisons and 48991 similarities\n", "10m 57s 933‰ - 642285597 comparisons and 49031 similarities\n", "10m 58s 934‰ - 642974006 comparisons and 49096 similarities\n", "10m 59s 935‰ - 643662415 comparisons and 49139 similarities\n", "11m 00s 936‰ - 644350824 comparisons and 49166 similarities\n", "11m 00s 937‰ - 645039233 comparisons and 49172 similarities\n", "11m 01s 938‰ - 645727642 comparisons and 49193 similarities\n", "11m 02s 939‰ - 646416051 comparisons and 49199 similarities\n", "11m 03s 940‰ - 647104460 comparisons and 49235 similarities\n", "11m 04s 941‰ - 647792869 comparisons and 49237 similarities\n", "11m 04s 942‰ - 648481278 comparisons and 49253 similarities\n", "11m 05s 943‰ - 649169687 comparisons and 49311 similarities\n", "11m 06s 944‰ - 649858096 comparisons and 49338 similarities\n", "11m 07s 945‰ - 650546505 comparisons and 49366 similarities\n", "11m 08s 946‰ - 651234914 comparisons and 49406 similarities\n", "11m 09s 947‰ - 651923323 comparisons and 49427 similarities\n", "11m 09s 948‰ - 652611732 comparisons and 49461 similarities\n", "11m 10s 949‰ - 653300141 comparisons and 49607 similarities\n", "11m 11s 950‰ - 653988550 comparisons and 49775 similarities\n", "11m 12s 951‰ - 654676959 comparisons and 49820 similarities\n", "11m 13s 952‰ - 655365368 comparisons and 49925 similarities\n", "11m 13s 953‰ - 656053777 comparisons and 49970 similarities\n", "11m 14s 954‰ - 656742186 comparisons and 50002 similarities\n", "11m 15s 955‰ - 657430595 comparisons and 50010 similarities\n", "11m 16s 956‰ - 658119004 comparisons and 50014 similarities\n", "11m 16s 957‰ - 658807413 comparisons and 50014 similarities\n", "11m 17s 958‰ - 659495822 comparisons and 50014 similarities\n", "11m 18s 959‰ - 660184231 comparisons and 50016 similarities\n", "11m 19s 960‰ - 660872640 comparisons and 50019 similarities\n", "11m 20s 961‰ - 661561049 comparisons and 50023 similarities\n", "11m 20s 962‰ - 662249458 comparisons and 50023 similarities\n", "11m 21s 963‰ - 662937867 comparisons and 50026 similarities\n", "11m 22s 964‰ - 663626276 comparisons and 50037 similarities\n", "11m 23s 965‰ - 664314685 comparisons and 50048 similarities\n", "11m 23s 966‰ - 665003094 comparisons and 50049 similarities\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "11m 24s 967‰ - 665691503 comparisons and 50072 similarities\n", "11m 25s 968‰ - 666379912 comparisons and 50105 similarities\n", "11m 26s 969‰ - 667068321 comparisons and 50113 similarities\n", "11m 27s 970‰ - 667756730 comparisons and 50126 similarities\n", "11m 27s 971‰ - 668445139 comparisons and 50135 similarities\n", "11m 28s 972‰ - 669133548 comparisons and 50150 similarities\n", "11m 29s 973‰ - 669821957 comparisons and 50161 similarities\n", "11m 30s 974‰ - 670510366 comparisons and 50176 similarities\n", "11m 31s 975‰ - 671198775 comparisons and 50179 similarities\n", "11m 31s 976‰ - 671887184 comparisons and 50183 similarities\n", "11m 32s 977‰ - 672575593 comparisons and 50191 similarities\n", "11m 33s 978‰ - 673264002 comparisons and 50195 similarities\n", "11m 34s 979‰ - 673952411 comparisons and 50199 similarities\n", "11m 34s 980‰ - 674640820 comparisons and 50248 similarities\n", "11m 35s 981‰ - 675329229 comparisons and 50286 similarities\n", "11m 36s 982‰ - 676017638 comparisons and 50307 similarities\n", "11m 37s 983‰ - 676706047 comparisons and 50347 similarities\n", "11m 37s 984‰ - 677394456 comparisons and 50431 similarities\n", "11m 38s 985‰ - 678082865 comparisons and 50452 similarities\n", "11m 39s 986‰ - 678771274 comparisons and 50498 similarities\n", "11m 39s 987‰ - 679459683 comparisons and 50539 similarities\n", "11m 40s 988‰ - 680148092 comparisons and 50587 similarities\n", "11m 41s 989‰ - 680836501 comparisons and 50612 similarities\n", "11m 42s 990‰ - 681524910 comparisons and 50621 similarities\n", "11m 42s 991‰ - 682213319 comparisons and 50633 similarities\n", "11m 43s 992‰ - 682901728 comparisons and 50652 similarities\n", "11m 44s 993‰ - 683590137 comparisons and 51146 similarities\n", "11m 45s 994‰ - 684278546 comparisons and 51414 similarities\n", "11m 45s 995‰ - 684966955 comparisons and 51547 similarities\n", "11m 46s 996‰ - 685655364 comparisons and 51673 similarities\n", "11m 47s 997‰ - 686343773 comparisons and 51707 similarities\n", "11m 47s 998‰ - 687032182 comparisons and 51768 similarities\n", "11m 48s 999‰ - 687720591 comparisons and 51831 similarities\n", "11m 49s 1000‰ - 688409000 comparisons and 51862 similarities\n", "11m 49s 1000% - 688409065 comparisons and 51862 similarities\n", "11m 49s Data written to ~/github/etcbc/dss/_temp/parallels/sim-1.6.zip\n" ] } ], "source": [ "similarity = readResults(PARA_DIR, f\"sim-{version}.zip\")\n", "if not similarity:\n", " similarity = computeSim()\n", " writeResults(similarity, PARA_DIR, f\"sim-{version}.zip\")" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "51862" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(similarity)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So, just over 50,000 pairs of similar lines." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Add parallels to the TF dataset\n", "\n", "We can add this information to the DSS dataset as an *edge feature*.\n", "\n", "An edge feature links two nodes and may annotate that link with a value.\n", "\n", "For parallels, we link each line to each of its parallel lines and we annotate that link with the similarity between\n", "the two lines. The similarity is a percentage, and we round it to integer values.\n", "\n", "If `n1` is similar to `n2`, then `n2` is similar to `n1`.\n", "In order to save space, we only add such links once.\n", "\n", "We can then use\n", "[`E.sim.b(node)`](https://annotation.github.io/text-fabric/tf/core/edgefeature.html#tf.core.edgefeature)\n", "to find all nodes that are parallel to node.\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "metaData = {\n", " \"\": {\n", " \"acronym\": \"dss\",\n", " \"description\": \"parallel lines in the DSS (computed)\",\n", " \"createdBy\": \"Dirk Roorda\",\n", " \"createdDate\": \"2022-09-29\",\n", " \"sourceCreatedDate\": \"2015\",\n", " \"sourceCreatedBy\": \"Martin G. Abegg, Jr., James E. Bowley, and Edward M. Cook\",\n", " \"convertedBy\": \"Jarod Jacobs, Martijn Naaijer and Dirk Roorda\",\n", " \"source\": \"Martin Abegg's data files, personal communication\",\n", " \"license\": \"Creative Commons Attribution-NonCommercial 4.0 International License\",\n", " \"licenseUrl\": \"http://creativecommons.org/licenses/by-nc/4.0/\",\n", " \"sourceDescription\": \"Dead Sea Scrolls: biblical and non-biblical scrolls\",\n", " },\n", " \"sim\": {\n", " \"valueType\": \"int\",\n", " \"edgeValues\": True,\n", " \"description\": \"similarity between lines, as a percentage of the common material wrt the combined material\",\n", " },\n", "}" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "simData = {}\n", "\n", "for ((f, t), d) in similarity.items():\n", " simData.setdefault(f, {})[t] = d" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.00s Exporting 0 node and 1 edge and 0 config features to ~/github/etcbc/dss/tf/1.6:\n", " | 0.06s T sim to ~/github/etcbc/dss/tf/1.6\n", " 0.06s Exported 0 node features and 1 edge features and 0 config features to ~/github/etcbc/dss/tf/1.6\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "TF.save(\n", " edgeFeatures=dict(sim=simData), metaData=metaData, module=module\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Turn the parallels feature into a module\n", "\n", "Here we show how to turn the new feature `sim` into a module, so that users can easily load it in a Jupyter notebook or in the TF browser." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This is a TF dataset\n", "Create release data for etcbc/dss/parallels/tf\n", "Found 5 versions\n", "zip files end up in ~/Downloads/None/etcbc-release/dss\n", "zipping etcbc/dss 1.4 with 1 features ==> parallels-tf-1.4.zip\n", "zipping etcbc/dss 1.5 with 1 features ==> parallels-tf-1.5.zip\n", "zipping etcbc/dss 1.6 with 1 features ==> parallels-tf-1.6.zip\n", "zipping etcbc/dss 1.7 with 1 features ==> parallels-tf-1.7.zip\n", "zipping etcbc/dss 1.7.1 with 1 features ==> parallels-tf-1.7.1.zip\n" ] } ], "source": [ "#%%bash\n", "!text-fabric-zip etcbc/dss/parallels/tf" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "This is a TF dataset\n", "Create release data for etcbc/dss/tf\n", "Found 3 versions\n", "zip files end up in ~/Downloads/None/etcbc-release/dss\n", "zipping etcbc/dss 1.6 with 70 features ==> tf-1.6.zip\n", "zipping etcbc/dss 1.7 with 70 features ==> tf-1.7.zip\n", "zipping etcbc/dss 1.7.1 with 70 features ==> tf-1.7.1.zip\n" ] } ], "source": [ "!text-fabric-zip etcbc/dss/tf" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I have added this file to a new release of the DSS GitHub repo." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Use the parallels module\n", "\n", "We load the DSS corpus again, but now with the parallels module." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The requested TF-app is not available offline\n", "\t~/github/annotation/app-dss/code not found\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "This is Text-Fabric 10.0.3\n", "Api reference : https://annotation.github.io/text-fabric/tf/cheatsheet.html\n", "\n", "0 features found and 0 ignored\n", " 0.00s Not all of the warp features otype and oslots are present in\n", "None/dss/\n", " 0.00s Only the Feature and Edge APIs will be enabled\n", " 0.00s Warp feature \"otext\" not found. Working without Text-API\n", "\n" ] }, { "data": { "text/html": [ "Text-Fabric: Text-Fabric API 10.0.3, no app configured
Data: None/dss/
Features:
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Text-Fabric API: names N F E L T S C TF directly usable

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "A = use(\"ETCBC/dss:clone\", checkout=\"clone\", hoist=globals())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Lo and behold: you see the parallels module listed with one feature: `sim`. It is in *italics*, which indicates\n", "it is an edge feature.\n", "\n", "We just do a quick check here and in another notebook we study parallels a bit more, using the feature `sim`.\n", "\n", "We count how many similar pairs their are, and how many 100% similar pairs there are." ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.25s 51862 results\n" ] }, { "data": { "text/plain": [ "1565737" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = \"\"\"\n", "line\n", "-sim> line\n", "\"\"\"\n", "results = A.search(query)\n", "refNode = results[20000][0]\n", "refNode" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 0.14s 3646 results\n" ] } ], "source": [ "query = \"\"\"\n", "line\n", "-sim=100> line\n", "\"\"\"\n", "results = A.search(query)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's show a few of the pairs are 100 percent similar." ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
nplineline
1 4Q268 f1:15את ארצ׳ו ולדשן בטוב אדמת׳ו ׃ ויבינו בעונ׳ם וידעו כי 1552980 את ארצ׳ו ולדשן בטוב אדמת׳ו ׃ ויבינו בעוונ׳מה וידעו כי 1563775
2 4Q267 f2:4ובקץ חרבן הארץ עמדו מסיגי הגבול ויתעו את ישראל ׃ 1553076 ובקץ חורבן הארץ עמדו מסיגי גבול ויתעו את ישראל 1563610
3 4Q266 f3iii:19אשר בזה ישראל את דברי׳הם ׃ והכוכב הוא דורש התורה 1553116 אשר בזה ישראל את דברי׳הם ׃ והכוכב הוא דורש התורה 1563157
4 4Q266 f9i:3אל יעל׳ה איש בסולם וחבל וכלי ׃ אל יעל איש למזבח בשבת 1553203 בסולם וחבל וכלי ׃ אל יעל איש למזבח בשבת 1563345
5 4Q464b f1:1ε אמר ל׳הם 1553296 ε לאמר ל ε ׃ 1575440
6 PAM43663 f43:1ε אמר ל׳הם 1553296 ε # אמר ל׳י ε 1588433
7 1Q4 f18:1ε אמר ל׳הם 1553296 ε לאמור ε ׃ 1589783
8 4Q264 f1:10חמר קורצ ולעפר תשוקת׳ו ׃ מה ישיב חמר ויוצר יד ולעצת מה יבין ׃ 1553669 קורץ ולעפר תשוקת׳ו ׃ מה ישיב חמר ויוצר יד לעצת מה יבין ׃ 1562924
9 4Q286 f11:3ε קודש׳ו # ε ׃ 1553730 ε ת קודש ε ׃ 1564740
10 4Q401 f31:3ε קודש׳ו # ε ׃ 1553730 ε י קודש ε ׃ 1571118
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "A.table(results, start=1, end=10, withNodes=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There is also a lower level way to work with edge features.\n", "\n", "We can list all edges going out from a reference node.\n", "What we see is tuple of pairs: the target node and the similarity between the reference node and that target node." ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((1565738, 100),\n", " (1565739, 100),\n", " (1565740, 100),\n", " (1565741, 100),\n", " (1565742, 100),\n", " (1565744, 67),\n", " (1565745, 62),\n", " (1565781, 60),\n", " (1565782, 60),\n", " (1565783, 60),\n", " (1565791, 67),\n", " (1565792, 67),\n", " (1565793, 67),\n", " (1565794, 60),\n", " (1565796, 60),\n", " (1565808, 60),\n", " (1565809, 71),\n", " (1565811, 71),\n", " (1565813, 71),\n", " (1565815, 71),\n", " (1565819, 67),\n", " (1565847, 67),\n", " (1565848, 67),\n", " (1565854, 75),\n", " (1565855, 75),\n", " (1565856, 67),\n", " (1565857, 67),\n", " (1565858, 67),\n", " (1565859, 67),\n", " (1565887, 71),\n", " (1565900, 71),\n", " (1565901, 71),\n", " (1565902, 71),\n", " (1565903, 71),\n", " (1565905, 71),\n", " (1565906, 71),\n", " (1565907, 71),\n", " (1565974, 100),\n", " (1565976, 67),\n", " (1565980, 67),\n", " (1565982, 67),\n", " (1566001, 67),\n", " (1566018, 71),\n", " (1566032, 71),\n", " (1573353, 67))" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "E.sim.f(refNode)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Likewise, we can observe the nodes that target the reference node:" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((1565730, 100),\n", " (1565731, 100),\n", " (1565732, 100),\n", " (1565733, 100),\n", " (1565734, 100),\n", " (1565735, 100),\n", " (1565736, 100))" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "E.sim.t(refNode)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Both sets of nodes are similar to the reference node and it is inconvenient to use both `.f()` and `.t()` to get the similar lines.\n", "\n", "But there is another way:" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((1565730, 100),\n", " (1565731, 100),\n", " (1565732, 100),\n", " (1565733, 100),\n", " (1565734, 100),\n", " (1565735, 100),\n", " (1565736, 100),\n", " (1565738, 100),\n", " (1565739, 100),\n", " (1565740, 100),\n", " (1565741, 100),\n", " (1565742, 100),\n", " (1565744, 67),\n", " (1565745, 62),\n", " (1565781, 60),\n", " (1565782, 60),\n", " (1565783, 60),\n", " (1565791, 67),\n", " (1565792, 67),\n", " (1565793, 67),\n", " (1565794, 60),\n", " (1565796, 60),\n", " (1565808, 60),\n", " (1565809, 71),\n", " (1565811, 71),\n", " (1565813, 71),\n", " (1565815, 71),\n", " (1565819, 67),\n", " (1565847, 67),\n", " (1565848, 67),\n", " (1565854, 75),\n", " (1565855, 75),\n", " (1565856, 67),\n", " (1565857, 67),\n", " (1565858, 67),\n", " (1565859, 67),\n", " (1565887, 71),\n", " (1565900, 71),\n", " (1565901, 71),\n", " (1565902, 71),\n", " (1565903, 71),\n", " (1565905, 71),\n", " (1565906, 71),\n", " (1565907, 71),\n", " (1565974, 100),\n", " (1565976, 67),\n", " (1565980, 67),\n", " (1565982, 67),\n", " (1566001, 67),\n", " (1566018, 71),\n", " (1566032, 71),\n", " (1573353, 67))" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "E.sim.b(refNode)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's make sure that `.b()` gives the combination of `.f()` and `.t()`." ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "the intersection of f and t is set()\n", "t | f = b ? True\n" ] } ], "source": [ "f = {x[0] for x in E.sim.f(refNode)}\n", "b = {x[0] for x in E.sim.b(refNode)}\n", "t = {x[0] for x in E.sim.t(refNode)}\n", "\n", "# are f and t disjoint ?\n", "\n", "print(f\"the intersection of f and t is {f & t}\")\n", "\n", "# is b the union of f and t ?\n", "\n", "print(f\"t | f = b ? {f | t == b}\")" ] } ], "metadata": { "jupytext": { "encoding": "# -*- coding: utf-8 -*-" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 }