{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0.00s This is LAF-Fabric 4.8.2\n", "API reference: http://laf-fabric.readthedocs.org/en/latest/texts/API-reference.html\n", "Feature doc: https://shebanq.ancient-data.org/static/docs/featuredoc/texts/welcome.html\n", "\n" ] } ], "source": [ "import sys, os\n", "import collections\n", "import subprocess\n", "import time\n", "\n", "from copy import deepcopy\n", "from lxml import etree\n", "\n", "import laf\n", "from laf.fabric import LafFabric\n", "from etcbc.preprocess import prepare\n", "from etcbc.mql import MQL\n", "fabric = LafFabric()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0.00s LOADING API: please wait ... \n", " 0.00s DETAIL: COMPILING m: etcbc4b: UP TO DATE\n", " 0.00s USING main: etcbc4b DATA COMPILED AT: 2015-11-02T15-08-56\n", " 0.01s DETAIL: load main: G.node_anchor_min\n", " 0.08s DETAIL: load main: G.node_anchor_max\n", " 0.15s DETAIL: load main: G.node_sort\n", " 0.20s DETAIL: load main: G.node_sort_inv\n", " 0.69s DETAIL: load main: G.edges_from\n", " 0.76s DETAIL: load main: G.edges_to\n", " 0.84s DETAIL: load main: F.etcbc4_db_monads [node] \n", " 1.70s DETAIL: load main: F.etcbc4_db_oid [node] \n", " 3.02s DETAIL: load main: F.etcbc4_db_otype [node] \n", " 3.76s DETAIL: load main: F.etcbc4_ft_code [node] \n", " 3.81s DETAIL: load main: F.etcbc4_ft_det [node] \n", " 4.04s DETAIL: load main: F.etcbc4_ft_function [node] \n", " 4.17s DETAIL: load main: F.etcbc4_ft_g_cons [node] \n", " 4.37s DETAIL: load main: F.etcbc4_ft_g_lex [node] \n", " 4.59s DETAIL: load main: F.etcbc4_ft_g_word [node] \n", " 4.82s DETAIL: load main: F.etcbc4_ft_g_word_utf8 [node] \n", " 5.12s DETAIL: load main: F.etcbc4_ft_lex [node] \n", " 5.32s DETAIL: load main: F.etcbc4_ft_prs [node] \n", " 5.51s DETAIL: load main: F.etcbc4_ft_rela [node] \n", " 5.88s DETAIL: load main: F.etcbc4_ft_sp [node] \n", " 6.07s DETAIL: load main: F.etcbc4_ft_txt [node] \n", " 6.12s DETAIL: load main: F.etcbc4_ft_typ [node] \n", " 6.49s DETAIL: load main: F.etcbc4_sft_book [node] \n", " 6.51s DETAIL: load main: F.etcbc4_sft_chapter [node] \n", " 6.53s DETAIL: load main: F.etcbc4_sft_label [node] \n", " 6.57s DETAIL: load main: F.etcbc4_sft_verse [node] \n", " 6.58s DETAIL: load main: F.etcbc4_ft_functional_parent [e] \n", " 6.87s DETAIL: load main: C.etcbc4_ft_functional_parent -> \n", " 7.82s DETAIL: load main: C.etcbc4_ft_functional_parent <- \n", " 8.32s LOGFILE=/Users/dirk/laf/laf-fabric-output/etcbc4b/mql/__log__mql.txt\n", " 8.32s INFO: LOADING PREPARED data: please wait ... \n", " 8.32s prep prep: G.node_sort\n", " 8.39s prep prep: G.node_sort_inv\n", " 8.98s prep prep: L.node_up\n", " 12s prep prep: L.node_down\n", " 19s prep prep: V.verses\n", " 19s prep prep: V.books_la\n", " 19s ETCBC reference: http://laf-fabric.readthedocs.org/en/latest/texts/ETCBC-reference.html\n", " 21s INFO: LOADED PREPARED data\n", " 21s INFO: DATA LOADED FROM SOURCE etcbc4b AND ANNOX lexicon FOR TASK mql AT 2016-09-28T10-35-28\n" ] } ], "source": [ "API = fabric.load('etcbc4b', '--', 'mql', {\n", " \"xmlids\": {\"node\": False, \"edge\": False},\n", " \"features\": ('''\n", " oid otype monads\n", " g_word g_lex g_word_utf8 g_cons lex \n", " typ code function rela det\n", " txt prs sp\n", " book chapter verse label\n", " ''','''\n", " functional_parent\n", " '''),\n", " \"prepare\": prepare,\n", "}, verbose='DETAIL')\n", "exec(fabric.localnames.format(var='fabric'))\n", "Q = MQL(API)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def text_per_verse():\n", " '''\n", " verse_dict is dict, keys are versenames(book_chapter_verse), values are concatenation of lexemes, each separated by _.\n", " letter_count_dict is dict, keys are letters, values are counts in whole Hebrew Bible.\n", " '''\n", " verse_dict = {}\n", " letter_count_dict = collections.defaultdict(int)\n", " for node in NN(): \n", " if F.otype.v(node) == 'verse':\n", " text = ''\n", " info = F.book.v(L.u('book', node)) + '_' + F.chapter.v(L.u('chapter', node)) + '_' + F.verse.v(node)\n", " words = L.d('word', node)\n", " for word in words:\n", " lexeme = F.lex.v(word)\n", " for lett in lexeme:\n", " if not lett in [' ', '/', '=', '[', '_']:\n", " text += lett\n", " letter_count_dict[lett] += 1\n", " text += ' '\n", " verse_dict[info] = text\n", " return(verse_dict, letter_count_dict)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def make_short_str(lexem, letter_count_dict):\n", " '''\n", " short_word is string \n", " '''\n", " word_lett_dict = {}\n", " if len(lexem) <= 2:\n", " return(lexem)\n", " else:\n", " for lett in lexem:\n", " word_lett_dict[lett] = letter_count_dict[lett]\n", " lowest = min(word_lett_dict, key=word_lett_dict.get) \n", " word_lett_dict.pop(lowest, None)\n", " lowest2 = min(word_lett_dict, key=word_lett_dict.get)\n", "\n", " low_list = [lowest, lowest2]\n", " short_word = ''\n", " for lett in lexem:\n", " if len(short_word) < 2:\n", " if lett in low_list:\n", " short_word += lett\n", "\n", " return(short_word)\n", " " ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def make_num_dict(letter_count_dict):\n", " '''\n", " number_dict is dict, key is one or two letter string (there are (23**23 + 23) keys), value is numerical code of that string\n", " '''\n", " number_dict = {}\n", " basic_num = 100\n", " for letter in letter_count_dict.keys():\n", " feat = letter\n", " number_dict[feat] = basic_num\n", " basic_num += 1\n", " for letter2 in letter_count_dict.keys():\n", " \n", " feat = letter + letter2\n", " number_dict[feat] = basic_num\n", " basic_num += 1\n", " \n", " return(number_dict)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def make_skip_grams(number_dict):\n", " '''\n", " five_sam is list of reduced words.\n", " sam_order is int, it is place of last word of skipgram in MT.\n", " skip_grams_in_book is dict, key is place, value is list of skip-grams associated with it.\n", " skips_and_info is dict, key is place (int), value is string: book_chapter_verse\n", " sam_dict is dict, first key is numerical code of first word in skip-gram, second key is tuple of next words in skip-gram, \n", " value is list of places of skip_gram in Hebrew Bible, place is place of last word in skip-gram\n", " \n", " '''\n", " sam_dict = collections.defaultdict(lambda: collections.defaultdict(list))\n", " skip_grams_in_book = collections.defaultdict(list)\n", " skips_and_info = {}\n", " order_list = []\n", " five_sam = []\n", " sam_order = 0\n", "\n", " for node in NN():\n", " if F.otype.v(node) == 'word':\n", " sam_order += 1\n", " info = F.book.v(L.u('book', node)), F.chapter.v(L.u('chapter', node)), F.verse.v(L.u('verse', node))\n", " lexeme = F.lex.v(node)\n", " redu_lex = ''\n", " for lett in lexeme:\n", " if not lett in [' ', '/', '=', '[', '_']:\n", " redu_lex += lett\n", " reduced = make_short_str(redu_lex, B)\n", " if len(five_sam) < 5: \n", " five_sam.append(reduced)\n", " if len(five_sam) == 5:\n", " numb = [number_dict[word] for word in five_sam]\n", " sam_dict[numb[0]][(str(numb[1]) + str(numb[2]) + str(numb[3]))].append(sam_order)\n", " sam_dict[numb[0]][(str(numb[1]) + str(numb[2]) + str(numb[4]))].append(sam_order)\n", " sam_dict[numb[0]][(str(numb[1]) + str(numb[3]) + str(numb[4]))].append(sam_order)\n", " sam_dict[numb[0]][(str(numb[2]) + str(numb[3]) + str(numb[4]))].append(sam_order)\n", " skip_grams_in_book[sam_order].append((numb[0], str(numb[1]) + str(numb[2]) + str(numb[3])))\n", " skip_grams_in_book[sam_order].append((numb[0], str(numb[1]) + str(numb[2]) + str(numb[4])))\n", " skip_grams_in_book[sam_order].append((numb[0], str(numb[1]) + str(numb[3]) + str(numb[4])))\n", " skip_grams_in_book[sam_order].append((numb[0], str(numb[2]) + str(numb[3]) + str(numb[4])))\n", " skips_and_info[sam_order] = info\n", " del(five_sam[0])\n", " return(skips_and_info, sam_dict)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def make_grambi_dict(sam_dict):\n", " '''\n", " grambi_dict is dict, key is place(int), value is list of other places sharing skip-gram with place \n", " grambi_list is list of all places (int)\n", " '''\n", " grambi_dict = collections.defaultdict(list)\n", " grambi_set = set()\n", " for key in sam_dict.keys():\n", " for key2 in sam_dict[key].keys():\n", " for place in sam_dict[key][key2]:\n", " grambi_set.add(place)\n", " for other_item in sam_dict[key][key2]:\n", " if not other_item in grambi_dict[place]:\n", " grambi_dict[place].append(other_item)\n", " grambi_list = sorted(list(grambi_set))\n", " return(grambi_dict, grambi_list) " ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def find_clusters(grambi_dict, grambi_list, chap_dict): #=HK, KL, FG\n", " finished_clusters = collections.defaultdict(list)\n", " cluster_dict = collections.defaultdict(list)\n", " start_skip_list = []\n", " for gram in grambi_list:\n", " if len(cluster_dict) == 0:\n", " if len(grambi_dict[gram]) > 1:\n", " for item in grambi_dict[gram]:\n", " if item != gram:\n", " if abs(item - gram) > 50:\n", " if not (chap_dict[gram][0], chap_dict[gram][1]) == (chap_dict[item][0], chap_dict[item][1]):\n", " if not set((chap_dict[gram], chap_dict[item])) in start_skip_list:\n", " cluster_dict[(gram, item)].append(gram)\n", " cluster_dict[(gram, item)].append(item)\n", " else:\n", " copy_dict = deepcopy(cluster_dict)\n", " keys = copy_dict.keys()\n", " for it1, it2 in keys:\n", " if gram in grambi_dict[cluster_dict[(it1, it2)][-1] + 1]:\n", " cluster_dict[(it1, it2)].append(gram)\n", " cluster_dict[(it1, it2)].append(cluster_dict[(it1, it2)][-2] + 1)\n", " else:\n", " if len(cluster_dict[(it1, it2)]) > 39:\n", " finished_clusters[(it1, it2)] = sorted(cluster_dict[(it1, it2)])\n", " start_skip_list.append(set((chap_dict[it1], chap_dict[it2])))\n", " cluster_dict.pop((it1, it2))\n", " print(chap_dict[it1], chap_dict[it2])\n", " else:\n", " cluster_dict.pop((it1, it2))\n", "\n", " return(finished_clusters)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('Genesis', '10', '2') ('Chronica_I', '1', '5')\n", "('Genesis', '10', '7') ('Chronica_I', '1', '9')\n", "('Genesis', '10', '13') ('Chronica_I', '1', '11')\n", "('Genesis', '10', '24') ('Chronica_I', '1', '18')\n", "('Genesis', '36', '32') ('Chronica_I', '1', '43')\n", "('Genesis', '36', '40') ('Chronica_I', '1', '51')\n", "('Exodus', '20', '2') ('Deuteronomium', '5', '6')\n", "('Exodus', '20', '4') ('Deuteronomium', '5', '8')\n", "('Exodus', '20', '5') ('Deuteronomium', '5', '9')\n", "('Exodus', '20', '9') ('Deuteronomium', '5', '13')\n", "('Exodus', '25', '4') ('Exodus', '35', '6')\n", "('Exodus', '25', '12') ('Exodus', '37', '3')\n", "('Exodus', '25', '17') ('Exodus', '37', '6')\n", "('Exodus', '25', '19') ('Exodus', '37', '8')\n", "('Exodus', '25', '20') ('Exodus', '37', '9')\n", "('Exodus', '25', '23') ('Exodus', '37', '10')\n", "('Exodus', '25', '26') ('Exodus', '37', '13')\n", "('Exodus', '25', '31') ('Exodus', '37', '17')\n", "('Exodus', '25', '33') ('Exodus', '37', '19')\n", "('Exodus', '26', '1') ('Exodus', '36', '8')\n", "('Exodus', '26', '4') ('Exodus', '36', '11')\n", "('Exodus', '26', '6') ('Exodus', '36', '13')\n", "('Exodus', '26', '14') ('Exodus', '36', '19')\n", "('Exodus', '26', '17') ('Exodus', '36', '22')\n", "('Exodus', '26', '21') ('Exodus', '36', '26')\n", "('Exodus', '26', '26') ('Exodus', '36', '31')\n", "('Exodus', '26', '28') ('Exodus', '36', '33')\n", "('Exodus', '28', '17') ('Exodus', '39', '10')\n", "('Exodus', '28', '25') ('Exodus', '39', '18')\n", "('Exodus', '28', '26') ('Exodus', '39', '19')\n", "('Exodus', '28', '27') ('Exodus', '39', '20')\n", "('Exodus', '30', '3') ('Exodus', '37', '26')\n", "('Exodus', '30', '27') ('Exodus', '31', '8')\n", "('Exodus', '31', '4') ('Exodus', '35', '32')\n", "('Exodus', '35', '16') ('Exodus', '39', '39')\n", "('Exodus', '35', '19') ('Exodus', '39', '41')\n", "('Exodus', '36', '9') ('Exodus', '26', '2')\n", "('Exodus', '36', '12') ('Exodus', '26', '5')\n", "('Exodus', '36', '14') ('Exodus', '26', '7')\n", "('Exodus', '36', '15') ('Exodus', '26', '8')\n", "('Exodus', '36', '20') ('Exodus', '26', '15')\n", "('Exodus', '36', '23') ('Exodus', '26', '18')\n", "('Exodus', '36', '27') ('Exodus', '26', '22')\n", "('Exodus', '36', '34') ('Exodus', '26', '29')\n", "('Exodus', '37', '4') ('Exodus', '25', '13')\n", "('Exodus', '37', '11') ('Exodus', '25', '24')\n", "('Exodus', '37', '18') ('Exodus', '25', '32')\n", "('Exodus', '37', '20') ('Exodus', '25', '34')\n", "('Exodus', '39', '11') ('Exodus', '28', '18')\n", "('Exodus', '39', '21') ('Exodus', '28', '28')\n", "('Leviticus', '3', '3') ('Leviticus', '4', '8')\n", "('Leviticus', '3', '9') ('Leviticus', '4', '8')\n", "('Leviticus', '3', '14') ('Leviticus', '4', '8')\n", "('Leviticus', '4', '9') ('Leviticus', '3', '10')\n", "('Leviticus', '4', '9') ('Leviticus', '3', '15')\n", "('Leviticus', '4', '9') ('Leviticus', '3', '4')\n", "('Leviticus', '4', '9') ('Leviticus', '7', '4')\n", "('Leviticus', '7', '4') ('Leviticus', '3', '4')\n", "('Leviticus', '7', '4') ('Leviticus', '3', '10')\n", "('Leviticus', '7', '4') ('Leviticus', '3', '15')\n", "('Leviticus', '11', '16') ('Deuteronomium', '14', '15')\n", "('Numeri', '21', '33') ('Deuteronomium', '3', '1')\n", "('Numeri', '21', '34') ('Deuteronomium', '3', '2')\n", "('Deuteronomium', '5', '10') ('Exodus', '20', '6')\n", "('Deuteronomium', '5', '14') ('Exodus', '20', '10')\n", "('Deuteronomium', '14', '6') ('Leviticus', '11', '3')\n", "('Deuteronomium', '14', '14') ('Leviticus', '11', '15')\n", "('Josua', '15', '15') ('Judices', '1', '11')\n", "('Judices', '1', '12') ('Josua', '15', '16')\n", "('Samuel_I', '31', '4') ('Chronica_I', '10', '4')\n", "('Samuel_I', '31', '7') ('Chronica_I', '10', '7')\n", "('Samuel_II', '7', '7') ('Chronica_I', '17', '6')\n", "('Samuel_II', '7', '16') ('Chronica_I', '17', '14')\n", "('Samuel_II', '8', '4') ('Chronica_I', '18', '4')\n", "('Samuel_II', '8', '10') ('Chronica_I', '18', '10')\n", "('Samuel_II', '8', '14') ('Chronica_I', '18', '13')\n", "('Samuel_II', '8', '15') ('Chronica_I', '18', '14')\n", "('Samuel_II', '10', '9') ('Chronica_I', '19', '10')\n", "('Samuel_II', '12', '30') ('Chronica_I', '20', '2')\n", "('Samuel_II', '22', '8') ('Psalmi', '18', '8')\n", "('Samuel_II', '22', '21') ('Psalmi', '18', '21')\n", "('Samuel_II', '22', '31') ('Psalmi', '18', '31')\n", "('Samuel_II', '22', '33') ('Psalmi', '18', '33')\n", "('Samuel_II', '23', '11') ('Chronica_I', '11', '13')\n", "('Samuel_II', '23', '16') ('Chronica_I', '11', '18')\n", "('Samuel_II', '23', '20') ('Chronica_I', '11', '22')\n", "('Samuel_II', '23', '21') ('Chronica_I', '11', '23')\n", "('Reges_I', '7', '25') ('Chronica_II', '4', '4')\n", "('Reges_I', '7', '41') ('Chronica_II', '4', '12')\n", "('Reges_I', '8', '5') ('Chronica_II', '5', '6')\n", "('Reges_I', '8', '7') ('Chronica_II', '5', '8')\n", "('Reges_I', '8', '8') ('Chronica_II', '5', '9')\n", "('Reges_I', '8', '14') ('Chronica_II', '6', '3')\n", "('Reges_I', '8', '16') ('Chronica_II', '6', '6')\n", "('Reges_I', '8', '19') ('Chronica_II', '6', '9')\n", "('Reges_I', '8', '23') ('Chronica_II', '6', '14')\n", "('Reges_I', '8', '27') ('Chronica_II', '6', '18')\n", "('Reges_I', '8', '29') ('Chronica_II', '6', '20')\n", "('Reges_I', '8', '31') ('Chronica_II', '6', '22')\n", "('Reges_I', '8', '35') ('Chronica_II', '6', '26')\n", "('Reges_I', '8', '36') ('Chronica_II', '6', '27')\n", "('Reges_I', '8', '40') ('Chronica_II', '6', '31')\n", "('Reges_I', '8', '43') ('Chronica_II', '6', '33')\n", "('Reges_I', '8', '45') ('Chronica_II', '6', '35')\n", "('Reges_I', '8', '47') ('Chronica_II', '6', '37')\n", "('Reges_I', '9', '3') ('Chronica_II', '7', '16')\n", "('Reges_I', '9', '8') ('Chronica_II', '7', '21')\n", "('Reges_I', '9', '19') ('Chronica_II', '8', '6')\n", "('Reges_I', '10', '13') ('Chronica_II', '9', '12')\n", "('Reges_I', '10', '19') ('Chronica_II', '9', '18')\n", "('Reges_I', '10', '22') ('Chronica_II', '9', '21')\n", "('Reges_I', '10', '26') ('Chronica_II', '1', '14')\n", "('Reges_I', '11', '42') ('Chronica_II', '9', '30')\n", "('Reges_I', '12', '4') ('Chronica_II', '10', '4')\n", "('Reges_I', '12', '5') ('Chronica_II', '10', '5')\n", "('Reges_I', '12', '7') ('Chronica_II', '10', '7')\n", "('Reges_I', '12', '9') ('Chronica_II', '10', '9')\n", "('Reges_I', '12', '10') ('Chronica_II', '10', '10')\n", "('Reges_I', '12', '12') ('Chronica_II', '10', '12')\n", "('Reges_I', '12', '17') ('Chronica_II', '10', '17')\n", "('Reges_I', '12', '18') ('Chronica_II', '10', '18')\n", "('Reges_I', '14', '21') ('Chronica_II', '12', '13')\n", "('Reges_I', '14', '26') ('Chronica_II', '12', '9')\n", "('Reges_I', '14', '29') ('Reges_I', '15', '7')\n", "('Reges_I', '15', '17') ('Chronica_II', '16', '1')\n", "('Reges_I', '15', '19') ('Chronica_II', '16', '3')\n", "('Reges_I', '22', '5') ('Chronica_II', '18', '4')\n", "('Reges_I', '22', '7') ('Chronica_II', '18', '6')\n", "('Reges_I', '22', '8') ('Chronica_II', '18', '7')\n", "('Reges_I', '22', '10') ('Chronica_II', '18', '9')\n", "('Reges_I', '22', '15') ('Chronica_II', '18', '14')\n", "('Reges_I', '22', '16') ('Chronica_II', '18', '15')\n", "('Reges_I', '22', '17') ('Chronica_II', '18', '16')\n", "('Reges_I', '22', '21') ('Chronica_II', '18', '20')\n", "('Reges_I', '22', '22') ('Chronica_II', '18', '21')\n", "('Reges_I', '22', '23') ('Chronica_II', '18', '22')\n", "('Reges_I', '22', '24') ('Chronica_II', '18', '23')\n", "('Reges_I', '22', '28') ('Chronica_II', '18', '27')\n", "('Reges_I', '22', '30') ('Chronica_II', '18', '29')\n", "('Reges_I', '22', '34') ('Chronica_II', '18', '33')\n", "('Reges_I', '22', '42') ('Chronica_II', '20', '31')\n", "('Reges_II', '8', '17') ('Chronica_II', '21', '5')\n", "('Reges_II', '11', '11') ('Chronica_II', '23', '10')\n", "('Reges_II', '12', '2') ('Chronica_II', '24', '1')\n", "('Reges_II', '13', '12') ('Reges_II', '14', '15')\n", "('Reges_II', '14', '8') ('Chronica_II', '25', '17')\n", "('Reges_II', '14', '11') ('Chronica_II', '25', '21')\n", "('Reges_II', '14', '21') ('Chronica_II', '26', '1')\n", "('Reges_II', '15', '2') ('Chronica_II', '26', '3')\n", "('Reges_II', '15', '33') ('Chronica_II', '27', '1')\n", "('Reges_II', '16', '2') ('Chronica_II', '28', '1')\n", "('Reges_II', '18', '18') ('Jesaia', '36', '3')\n", "('Reges_II', '18', '21') ('Jesaia', '36', '6')\n", "('Reges_II', '18', '23') ('Jesaia', '36', '8')\n", "('Reges_II', '18', '27') ('Jesaia', '36', '12')\n", "('Reges_II', '18', '31') ('Jesaia', '36', '16')\n", "('Reges_II', '18', '36') ('Jesaia', '36', '21')\n", "('Reges_II', '19', '1') ('Jesaia', '37', '1')\n", "('Reges_II', '19', '3') ('Jesaia', '37', '3')\n", "('Reges_II', '19', '4') ('Jesaia', '37', '4')\n", "('Reges_II', '19', '6') ('Jesaia', '37', '6')\n", "('Reges_II', '19', '7') ('Jesaia', '37', '7')\n", "('Reges_II', '19', '9') ('Jesaia', '37', '9')\n", "('Reges_II', '19', '11') ('Jesaia', '37', '11')\n", "('Reges_II', '19', '15') ('Jesaia', '37', '16')\n", "('Reges_II', '19', '18') ('Jesaia', '37', '19')\n", "('Reges_II', '19', '20') ('Jesaia', '37', '21')\n", "('Reges_II', '19', '21') ('Jesaia', '37', '22')\n", "('Reges_II', '19', '23') ('Jesaia', '37', '24')\n", "('Reges_II', '19', '25') ('Jesaia', '37', '26')\n", "('Reges_II', '19', '27') ('Jesaia', '37', '28')\n", "('Reges_II', '19', '29') ('Jesaia', '37', '30')\n", "('Reges_II', '19', '35') ('Jesaia', '37', '36')\n", "('Reges_II', '20', '3') ('Jesaia', '38', '3')\n", "('Reges_II', '20', '6') ('Jesaia', '38', '5')\n", "('Reges_II', '20', '13') ('Jesaia', '39', '2')\n", "('Reges_II', '20', '15') ('Jesaia', '39', '4')\n", "('Reges_II', '20', '17') ('Jesaia', '39', '6')\n", "('Reges_II', '21', '2') ('Chronica_II', '33', '2')\n", "('Reges_II', '21', '7') ('Chronica_II', '33', '7')\n", "('Reges_II', '21', '24') ('Chronica_II', '33', '25')\n", "('Reges_II', '22', '12') ('Chronica_II', '34', '20')\n", "('Reges_II', '22', '20') ('Chronica_II', '34', '28')\n", "('Reges_II', '24', '18') ('Jeremia', '52', '1')\n", "('Reges_II', '25', '1') ('Jeremia', '52', '4')\n", "('Reges_II', '25', '3') ('Jeremia', '52', '6')\n", "('Reges_II', '25', '4') ('Jeremia', '52', '7')\n", "('Reges_II', '25', '19') ('Jeremia', '52', '25')\n", "('Reges_II', '25', '21') ('Jeremia', '52', '27')\n", "('Jesaia', '2', '3') ('Micha', '4', '2')\n", "('Jesaia', '36', '4') ('Reges_II', '18', '19')\n", "('Jesaia', '36', '7') ('Reges_II', '18', '22')\n", "('Jesaia', '36', '9') ('Reges_II', '18', '24')\n", "('Jesaia', '36', '22') ('Reges_II', '18', '37')\n", "('Jesaia', '37', '8') ('Reges_II', '19', '8')\n", "('Jesaia', '37', '10') ('Reges_II', '19', '10')\n", "('Jesaia', '37', '12') ('Reges_II', '19', '12')\n", "('Jesaia', '37', '27') ('Reges_II', '19', '26')\n", "('Jesaia', '37', '29') ('Reges_II', '19', '28')\n", "('Jesaia', '37', '31') ('Reges_II', '19', '30')\n", "('Jesaia', '37', '37') ('Reges_II', '19', '36')\n", "('Jesaia', '38', '6') ('Reges_II', '20', '6')\n", "('Jesaia', '39', '3') ('Reges_II', '20', '14')\n", "('Jesaia', '39', '7') ('Reges_II', '20', '18')\n", "('Jeremia', '10', '12') ('Jeremia', '51', '15')\n", "('Jeremia', '10', '13') ('Jeremia', '51', '16')\n", "('Jeremia', '16', '14') ('Jeremia', '23', '7')\n", "('Jeremia', '30', '10') ('Jeremia', '46', '27')\n", "('Jeremia', '49', '19') ('Jeremia', '50', '44')\n", "('Jeremia', '51', '17') ('Jeremia', '10', '14')\n", "('Jeremia', '52', '13') ('Reges_II', '25', '9')\n", "('Jeremia', '52', '17') ('Reges_II', '25', '13')\n", "('Ezechiel', '3', '17') ('Ezechiel', '33', '7')\n", "('Ezechiel', '36', '4') ('Ezechiel', '6', '3')\n", "('Psalmi', '18', '9') ('Samuel_II', '22', '9')\n", "('Psalmi', '18', '22') ('Samuel_II', '22', '22')\n", "('Psalmi', '18', '34') ('Samuel_II', '22', '34')\n", "('Psalmi', '60', '7') ('Psalmi', '108', '7')\n", "('Psalmi', '60', '9') ('Psalmi', '108', '9')\n", "('Psalmi', '60', '12') ('Psalmi', '108', '12')\n", "('Psalmi', '96', '7') ('Chronica_I', '16', '28')\n", "('Psalmi', '105', '1') ('Chronica_I', '16', '8')\n", "('Psalmi', '105', '7') ('Chronica_I', '16', '14')\n", "('Psalmi', '106', '47') ('Chronica_I', '16', '35')\n", "('Psalmi', '108', '13') ('Psalmi', '60', '13')\n", "('Iob', '1', '7') ('Iob', '2', '2')\n", "('Esra', '1', '1') ('Chronica_II', '36', '22')\n", "('Esra', '2', '9') ('Nehemia', '7', '14')\n", "('Esra', '2', '36') ('Nehemia', '7', '39')\n", "('Esra', '2', '46') ('Nehemia', '7', '49')\n", "('Esra', '2', '51') ('Nehemia', '7', '53')\n", "('Esra', '2', '58') ('Nehemia', '7', '60')\n", "('Esra', '2', '61') ('Nehemia', '7', '63')\n", "('Nehemia', '7', '40') ('Esra', '2', '37')\n", "('Nehemia', '7', '49') ('Esra', '2', '47')\n", "('Nehemia', '7', '54') ('Esra', '2', '52')\n", "('Nehemia', '7', '61') ('Esra', '2', '59')\n", "('Nehemia', '7', '64') ('Esra', '2', '62')\n", "('Chronica_I', '1', '12') ('Genesis', '10', '14')\n", "('Chronica_I', '1', '19') ('Genesis', '10', '25')\n", "('Chronica_I', '1', '44') ('Genesis', '36', '33')\n", "('Chronica_I', '1', '52') ('Genesis', '36', '41')\n", "('Chronica_I', '8', '32') ('Chronica_I', '9', '38')\n", "('Chronica_I', '8', '37') ('Chronica_I', '9', '43')\n", "('Chronica_I', '9', '39') ('Chronica_I', '8', '33')\n", "('Chronica_I', '9', '44') ('Chronica_I', '8', '38')\n", "('Chronica_I', '10', '8') ('Samuel_I', '31', '8')\n", "('Chronica_I', '16', '9') ('Psalmi', '105', '2')\n", "('Chronica_I', '16', '15') ('Psalmi', '105', '8')\n", "('Chronica_I', '16', '25') ('Psalmi', '96', '4')\n", "('Chronica_I', '17', '7') ('Samuel_II', '7', '8')\n", "('Chronica_I', '17', '15') ('Samuel_II', '7', '17')\n", "('Chronica_I', '18', '5') ('Samuel_II', '8', '5')\n", "('Chronica_I', '19', '11') ('Samuel_II', '10', '10')\n", "('Chronica_II', '1', '15') ('Reges_I', '10', '27')\n", "('Chronica_II', '4', '13') ('Reges_I', '7', '42')\n", "('Chronica_II', '5', '7') ('Reges_I', '8', '6')\n", "('Chronica_II', '6', '4') ('Reges_I', '8', '15')\n", "('Chronica_II', '6', '7') ('Reges_I', '8', '17')\n", "('Chronica_II', '6', '10') ('Reges_I', '8', '20')\n", "('Chronica_II', '6', '15') ('Reges_I', '8', '24')\n", "('Chronica_II', '6', '19') ('Reges_I', '8', '28')\n", "('Chronica_II', '6', '36') ('Reges_I', '8', '46')\n", "('Chronica_II', '9', '19') ('Reges_I', '10', '20')\n", "('Chronica_II', '9', '27') ('Reges_I', '10', '27')\n", "('Chronica_II', '9', '31') ('Reges_I', '11', '43')\n", "('Chronica_II', '10', '8') ('Reges_I', '12', '8')\n", "('Chronica_II', '10', '11') ('Reges_I', '12', '11')\n", "('Chronica_II', '12', '10') ('Reges_I', '14', '27')\n", "('Chronica_II', '18', '8') ('Reges_I', '22', '9')\n", "('Chronica_II', '18', '10') ('Reges_I', '22', '11')\n", "('Chronica_II', '18', '17') ('Reges_I', '22', '18')\n", "('Chronica_II', '18', '24') ('Reges_I', '22', '25')\n", "('Chronica_II', '18', '30') ('Reges_I', '22', '31')\n", "('Chronica_II', '23', '11') ('Reges_II', '11', '12')\n", "('Chronica_II', '25', '1') ('Reges_II', '14', '2')\n", "('Chronica_II', '25', '18') ('Reges_II', '14', '9')\n", "('Chronica_II', '25', '22') ('Reges_II', '14', '12')\n", "('Chronica_II', '29', '1') ('Reges_II', '18', '2')\n", "('Chronica_II', '33', '6') ('Reges_II', '21', '6')\n", "('Chronica_II', '36', '23') ('Esra', '1', '2')\n", "--- 63.04774308204651 seconds ---\n" ] } ], "source": [ "start_time = time.time()\n", "A, B = text_per_verse()\n", "D = make_num_dict(B)\n", "FG, GH = make_skip_grams(D)\n", "HK, KL = make_grambi_dict(GH)\n", "LM = find_clusters(HK, KL, FG)\n", "print(\"--- %s seconds ---\" % (time.time() - start_time))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "281" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(LM)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }