{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "4d22101e-2686-42a9-91fd-ab29eb4555be", "metadata": { "scrolled": true, "tags": [] }, "outputs": [], "source": [ "import igraph as ig\n", "from igraph import Graph\n", "from Bio import SeqIO\n", "import logging\n", "from logging import debug, info, warning, error\n", "logger = logging.getLogger()\n", "logger.setLevel(logging.DEBUG)\n", "\n", "\n", "class DBGraph(Graph):\n", " newid = 0 # allow for a fixed id per node\n", " k = 4 # we need to know the k-mer size (note: the length of the sequence per node is k-1)\n", " #igraph = None\n", " name = \"\"\n", " compacted = False\n", " contigs = []\n", " \n", " def __init__(self, name=\"\", k=4):\n", " self.name = name\n", " self.k = k\n", " super().__init__(directed=True)\n", "\n", " \n", " \n", " \n", " \n", " # utility function to quickly retrieve the edge between two igraph nodes\n", " # this function should exist in igraph itself\n", " def edge(self, n1, n2):\n", " gl = self\n", " return gl.es[gl.get_eid(n1.index, n2.index)]\n", " \n", " \"\"\"\n", " \n", " utility functions: adds the node sequences to the graph\n", " This one initialized a fresh node if the sequence is not already there.\n", " \n", " \"\"\"\n", " \n", " def find_or_fresh_node(self, s):\n", " new_node = None\n", " g = self\n", " try:\n", " new_node = g.vs.find(sequence=s) \n", " except: \n", " if not new_node:\n", " new_node = g.add_vertex(name=s)\n", " new_node['id'] = self.newid # we need to keep a fixed id to reference nodes if other nodes are deleted\n", " new_node['sequence'] = s\n", " new_node['label'] = str(new_node.index) +':'+ s\n", " new_node['visited'] = False\n", " new_node['compacted'] = False\n", " new_node['coverage'] = 1\n", " new_node['sum'] = 0\n", " new_node['dist'] = 0\n", " \n", " assert new_node, 'Failed to create new node!'\n", " self.newid += 1 \n", " return new_node\n", " \n", " \n", " def add(self, s1, s2):\n", " if self.compacted:\n", " raise Exception(\"New nodes cannot be added to a compacted graph.\")\n", " g = self\n", " n1 = self.find_or_fresh_node(s1) \n", " n2 = self.find_or_fresh_node(s2) \n", " \n", " i1 = n1.index\n", " i2 = n2.index\n", " debug (\"adding edge\"+str(i1)+\" --> \"+str(i2))\n", " eid = None\n", " try: # the edge could be there already\n", " eid = g.get_eid(i1,i2)\n", " g.es[eid][\"coverage\"] += 1 ## in a true DeBrujn Graph the edges carry the multiplicity!\n", " except: # the edge is new\n", " g.add_edges([(i1,i2)])\n", " eid = g.get_eid(i1,i2)\n", " g.es[eid][\"coverage\"] = 1 \n", " \n", " def add_kmer(self,kmer):\n", " assert len(kmer) == self.k, \"kmer sequence must have length k\" \n", " self.add(kmer[0:self.k-1], kmer[1:self.k])\n", " \n", " def add_from_file(self, file): \n", " for record in SeqIO.parse(file, 'fasta'):\n", " i = 0\n", " while i < (len(record.seq) - self.k + 1):\n", " self.add_kmer(record.seq[i:i+self.k])\n", " i += 1\n", " \n", "\n", " def get_node0(self):\n", " return Node(graph=self, id=0)\n", " \n", " \"\"\"\n", " compact the graph by compressing linear stretches of nodes into a single node\n", " and deleting the redundant nodes\n", " conserves coverage by computing the average coverage for the compacted nodes\n", " and stores it in the node\n", " That leads to a much more compact representation and reduces the effort of \n", " subsequenct filtering and contig generation\n", " Note that no more k-mers can be added after compaction, because the original \n", " k-mers can no longer be found by dictionary lookup\n", " \"\"\"\n", " \n", " \n", " def compact(self):\n", " if self.compacted:\n", " raise Exception(\"Compaction can only be run once per graph\")\n", " self.compacted = True\n", " ## look for simple 0 start nodes first\n", " ## only nodes with outdegree can be compacted \n", " # for ni in nset0: print(ni[\"name\"])\n", " i = 0\n", " # update set of not yet compacted nodes after each iteration\n", " while True: # this needs to be done this way, otherwise the set is not updated correctly \n", " nset0 = self.vs.select(_outdegree_eq = 1, compacted = False)\n", " debug(str (i) + ': len(nset0)' + str(len(nset0)) )\n", " if len(nset0) < 1: \n", " info('no more compactable stretches')\n", " break\n", " n = nset0[0]\n", " i += 1\n", " ### we want to walk to the left-most position, before engaging with the compaction\n", " n['visited'] = True\n", " while (n.indegree() == 1 and n.predecessors()[0].outdegree() == 1 and not n.predecessors()[0]['visited']):\n", " # move left \n", " n = n.predecessors()[0]\n", " n['visited'] = True # avoid infinite loop\n", " debug('seeking left' + n['sequence'] )\n", " for noob in self.vs: noob['visited'] = False # set it back\n", " debug (\"Start compacting: \" + str(n) + str(n.outdegree())) \n", " assert n.outdegree() == 1 and (not n['compacted']), \"We hit an already compacted node!\" # this should never happen because of select statement\n", " first_time = True\n", " # don't take this node again\n", " # we are going to simply delete the neighbors, there is only one dircet neighbor\n", " # but we also need to check if the next node is balanced\n", " n['compacted'] = True # mark this node as compacted, so it won't come up again\n", " n['sum'] = 0 # compacted nodes store their coverage themselves, sum of coverages\n", " n['dist'] = 0 # distance forward to next \"junction\" ahead\n", " \n", " ## control id, keeping the originally assigned ids in the nodes \n", " cid = n[\"id\"]\n", " \n", " ## This is the actual compaction for node n \n", " while True: \n", " n = self.vs.select(id = cid)[0] \n", " ## when deletng nodes, weird things kan happen, so make sure we got the same node as before\n", " assert n, \"node must stay the same under one iteration\" \n", " ## exit condition: we don't have exactly 1 successor, or the next node has \n", " ## multiple incoming nodes\n", " if (n.outdegree() != 1 or n.successors()[0].indegree() != 1):\n", " debug ('-------------- finished with node ' + str(n))\n", " break \n", " \n", " \n", " succ = n.successors()[0] # node is linear, so we are sure\n", " ### don't follow only self-reference:\n", " if n == succ: \n", " debug('------------------ hit a self reference, finished')\n", " break\n", " if succ['visited']:\n", " debug ('------------------ hit a loop here')\n", " break\n", " if n in n.successors() or n in succ.successors():\n", " debug('------------------- we hit a loop, finished')\n", " break\n", " \n", " ## Only the very first node in our path needs to keep the full k-mer, that is a node with in_degree 0\n", " ## All other need to be clipped now\n", " if n.indegree() > 0 and first_time:\n", " # otherwise we would artificially prolong the sequence\n", " # indegree 0 nodes always keep their sequence\n", " n['sequence'] = n['sequence'][self.k-2:]\n", " first_time = False\n", " else: \n", " first_time = False # but only once, otherwise we'd chop off the whole sequence\n", " \n", " debug(\"really compacting \" + n['sequence'] + ' --> ' + succ['sequence'] + str(n))\n", " n['visited'] = True\n", " eid = self.get_eid(n.index, succ.index) # geet the edge id\n", " ## add edges to all succ.successors to this node\n", " for s in succ.successors():\n", " self.add_edges([(n.index,s.index)]) ## add a new edge, \n", " new_eid = self.get_eid(n.index, s.index) ## and get the index of the edge we just made\n", " ## we need to copy the coverage attribute over, \n", " ## so get the edge connecting \n", " ## successor to its next successor\n", " succ_eid = self.get_eid(succ.index, s.index)\n", " self.es[new_eid]['coverage'] = self.es[succ_eid]['coverage'] # copy coverage \n", " ## get the successors single character and add to node sequence\n", " if succ[\"compacted\"]:\n", " ## add the sequence of the compacted node, it is already compacted\n", " n[\"sequence\"] += succ[\"sequence\"]\n", " n[\"sum\"] += succ[\"sum\"]\n", " n['dist'] += succ['dist'] \n", " else:\n", " n[\"sequence\"] += succ[\"sequence\"][-1:]\n", " if n['sum'] == None:\n", " error ('this should never happen: ' + str(n))\n", " assert self.es[eid]['coverage'], \"Every edge should have a coverage attribute\"\n", " assert n['sum'] != None, \"we set the coverage attribute already\"\n", " n['sum'] += self.es[eid]['coverage'] # here the coverage is in the edge\n", " n['dist'] += 1 # now we have made only a single step \n", " n[\"name\"] = n[\"sequence\"] \n", " n['label'] = '['+ str(n.index) +'] '+ n['sequence'] \n", " ## now delete the successor and the edge\n", " eid = self.get_eid(n.index, succ.index)\n", " self.delete_edges(eid)\n", " debug (\"deleting: \" + succ['name'])\n", " self.delete_vertices(succ.index)\n", " \n", " ## Now the compaction is complete, but we still have sum and dist per node, but not coverage, so we need to update\n", " ## the compacted nodes in a last sweep over all compacted nodes\n", " for n in self.vs: n['visited'] = False \n", " for n in self.vs.select(compacted = True):\n", " if (n['dist'] == 0): \n", " n['coverage'] = 1\n", " n['compacted'] = False ## these nodes were not really compacted, there were just marked \n", " else:\n", " n['coverage'] = n['sum']/n['dist']\n", " \n", " \n", " \n", " def viz(self):\n", " g = self\n", " layout = g.layout(\"kk\")\n", " color_dict = {True: \"lightgreen\", False: \"tomato\"}\n", " colors = [color_dict[c] for c in g.vs[\"compacted\"]]\n", " shapes = []\n", " visual_style = {}\n", " visual_style[\"edge_width\"] = [0.5 * int(coverage) for coverage in g.es[\"coverage\"]]\n", " visual_style[\"vertex_size\"] = [20 * int(coverage) for coverage in g.vs[\"coverage\"]]\n", " for s in g.vs.indegree():\n", " if s == 0:\n", " shapes.append(\"rectangle\")\n", " else:\n", " shapes.append(\"circle\")\n", " g.vs['label'] = list(map(lambda x: str(x.index)+':'+x['sequence'], g.vs)) \n", " return(ig.plot(g, bbox=(0, 0, 400, 400), vertex_color=colors, \n", " vertex_shape=shapes, vertex_label_dist = -0.5, layout=layout, **visual_style ))\n", "\n", " \n", "\n", " \n", " \n", " ## utility function to get the best node, strategy:\n", " ## ignore self reference\n", " ## 0. if no further node: None\n", " ## 1. if outdegree == 1, get the node\n", " ## 2. if outdegree > 0, prioritize:\n", " ## 1) get not visited node with highest coverage\n", " ## the user is responsible to mark node as visited if it is used\n", " ## we don't want to hit on an infinite loop, but if a visited node has an unvisited next node\n", " ## it could still be part of an Eulerian walk\n", " \n", " \"\"\"\n", " TODO: This function has much room for improvement. It may break up a perfectly Eulerian\n", " walk through the graph. Find an example of such a Eulerian graph topology. How could this be\n", " improved?\n", " \"\"\"\n", " def get_next_node(self, n):\n", " \n", " if (n.outdegree() < 1): return None\n", " if (n.outdegree() == 1):\n", " nn = n.successors()[0] ## there is only one, return it, but only if it is not self referential\n", " if n == nn: # or nn['visited']: # self reference or already visited\n", " ### this is sub-optimal, we cannot solve graph named \"double loop\" below\n", " ### like this properly, at least it protects against infinite loop\n", " ### in order to resolve double loops and alternative paths, we have to return \n", " ### nn in case it has an edge that we haven't visited yet\n", " return None\n", " if not all(list(map(lambda y: y['visited'] or nn == y, nn.successors()))):\n", " # checks, if of successor's successors has not been visited yet\n", " # slightly better, but still not optimal, one would want to define a \n", " # coverage threshold, at this point. However, such threshold should be\n", " # adaptive, depending on global or local coverage\n", " # Idea: calculate local coverage at the environment,\n", " # to use a node twice, each edge to traverse should have at least similar (e.g. 1/2?) coverage\n", " # compared to the local environment\n", " return nn\n", " else: \n", " return None\n", " \n", " nset = n.successors()\n", " tmp = list(filter (lambda x: not x['visited'], nset))\n", " if len(tmp) > 0: \n", " nn = sorted(tmp, key=lambda node: (n.outdegree(), node['coverage'], self.edge(n, node)['coverage'], len(node['sequence'] )), reverse=True)[0] \n", " if n == nn: # self reference\n", " return None\n", " else:\n", " return nn\n", " else: \n", " tmp = nset \n", " tmp = list(filter(lambda x: (not all(list(map(lambda y: y['visited'] or x == y, x.successors())))), tmp))\n", " \n", " if len(tmp) < 1: \n", " return None\n", " else:\n", " debug ('has unvisited next: ' + str(tmp)) \n", " tmp = sorted(tmp, key=lambda node: (node['coverage'], self.edge(n, node)['coverage'], len(node['sequence'] )), reverse=True) \n", " \n", " nn = tmp[0]\n", " if n == nn: # self reference\n", " return None\n", " else:\n", " return nn\n", " \n", " \"\"\"\n", " make a walk through the graph, given the start node\n", " \"\"\"\n", " def walk(self, n):\n", " sequence = n['sequence']\n", " n['visited'] = True\n", " n1 = n\n", " debug('walk: ' + n1['name'] + n1['sequence'])\n", " while self.get_next_node(n1):\n", " \n", " n1 = self.get_next_node(n1)\n", " debug('walk: ' + n1['name'])\n", " n1['visited'] = True\n", " if n1['compacted']:\n", " sequence += n1['sequence']\n", " debug ('seq: '+n1['sequence'])\n", " else:\n", " sequence += n1['sequence'][self.k-2]\n", " debug ('seq: '+n1['sequence'][self.k-2])\n", " info ('Done') \n", " return sequence \n", " \n", " \n", " \n", " \n", " \n", " ### The gredy assembly is the simplest possible heuristics and works on the compacted graph\n", " ### With greedy bubble correction and at every junction, follow the path of highest coverage\n", " ### No variants will be detected this way\n", " ### Dangling short ends will be cut off if they are shorter than min_contig_len, and there is another \n", " ### path > 2*k ahead, even if they have higher coverage \n", " ### If a repeat (a node that has already been visited) is hit with distance < min_contig_len, the contig will be dropped \n", " # - get all start nodes, easy ones first until all nodes have been visited\n", " def assemble(self):\n", " \n", " contigs = []\n", " min_contig_len = self.k ## very lenient, could be increased but ok for toy examples\n", " \n", " # get optimal start nodes first: optimal start nodes are those with indegree 0 and not visited\n", " counter = 0\n", " nset0 = self.vs.select( visited = False, _indegree = 0 )\n", " \n", " while counter <= len(nset0): # protect against infinite loop\n", " nset = self.vs.select(visited = False, _indegree = 0)\n", " # we could also sort the nodeset to refine our strategy further but we don't need that now\n", " if len(nset) < 1: break\n", " contig = self.walk(nset[0])\n", " if len(contig) >= min_contig_len:\n", " contigs.append(contig)\n", " \n", " # do the same for remaining other unvisited nodes\n", " nset0 = self.vs.select( visited = False)\n", " \n", " while counter <= len(nset0): # protect against infinite loop\n", " \n", " nset = self.vs.select(visited = False)\n", " # we could also sort the nodeset to refine our strategy further\n", " # as we might end up being somewhere in a loop, we could as well start with\n", " # the longest node\n", " nset = sorted(nset, key = lambda x: len(x['sequence']), reverse = True)\n", " if len(nset) < 1: break\n", " contig = self.walk(nset[0])\n", " if len(contig) >= min_contig_len:\n", " contigs.append(contig)\n", " \n", " self.contigs = contigs\n", " return contigs\n", " \n", " \n", " \n", " \n", " \n", " \n", " \"\"\"\n", " utility funciton to calculate the coverage per graph\n", " assume there is a real path between nodes\n", " \"\"\"\n", " def get_path_coverage(self, path):\n", " pathsum = 0\n", " pathlen = 0\n", " for i in range(len(path)):\n", " n = self.vs[path[i]]\n", " if (n['compacted']):\n", " pathsum += n['sum']\n", " pathlen += n['dist']\n", " else:\n", " if i < len(path)-1:\n", " pathsum += self.edge(self.vs[path[i]], self.vs[path[i+1]])['coverage']\n", " pathlen += 1\n", " \n", " if (pathlen > 0): \n", " return pathsum/pathlen\n", " else:\n", " return 0 \n", " \n", " ## simple bubble removal strategy:\n", " ## a bubble starts at a node with outdegree > 1\n", " ## it ends if the paths join again\n", " ## if the paths join at all\n", " ## if there are alternative paths to the confluent node, AND all nodes in a path have outdegree == 1\n", " ## then we have a real bubble: remove the nodes in the path(s) with lowest coverage\n", " ## however, we ignore the case for now that there could be divergent paths emerging from a lower \n", " ## coverage bubble. \n", " def remove_bubbles(self):\n", " max_look_ahead = 100 # maximum number of nodes to look ahead for a junction\n", " # first check if there is a bubble at all\n", " # we heavily use graph and set operations here\n", " nset0 = self.vs.select(_outdegree_gt = 1) # the maximum number of nodes to visit \n", " counter = 0 \n", " while (counter < len(nset0)): # max number of iterations possible because we only delete nodes\n", " counter = counter + 1\n", " ## we have to update our nodes each time, because nodes could have been deleted already\n", " nset = self.vs.select(_outdegree_gt = 1)\n", " if len (nset) < 1: break\n", " n = nset[0]\n", " nhoods = self.neighborhood(n.successors(), order=max_look_ahead, mode=\"out\") ## get nodeset in neighborhoods\n", " for i in range(len(nhoods)):\n", " for j in range(i+1, len(nhoods)):\n", " ## if the intersections are non-empty, there is a confluent node\n", " confluent_node = isect(nhoods[i],nhoods[j])\n", " if (confluent_node):\n", " info (str(counter) + ':' +' bubble detected:' + n['name'] +' ---> ' + self.vs[confluent_node]['name'])\n", " paths = list(map(lambda x: self.get_shortest_paths(x,confluent_node)[0], n.successors()))\n", " paths = sorted(paths, key= lambda x: (g.get_path_coverage(x),len(x))) # order paths by coverage, then by length\n", " debug(str(paths))\n", " keep_this = paths.pop()\n", " for path in paths:\n", " if n.index in path: continue ## this is a loop, we cannot deal with this case yet\n", " del_me = set(path) - set(keep_this) # get a set with shared nodes removed \n", " # this guarantees also that we do not accidentially delete nodes that are still needed\n", " # one could decide to only delete linear nodes\n", " del_me = list(filter(lambda x: self.vs[x].outdegree() == 1 , del_me))\n", " ## if the graph was compacted already this would delete only compacted nodes\n", " # del_me = list(filter(lambda x: self.vs[x].indegree() == 1 , del_me))\n", " \n", " info (\"deleting the following nodes: \"+ str(del_me))\n", " self.delete_vertices(del_me) # that will also clean the edges\n", " \n", " \n", " \n", " \n", "def isect(lst1, lst2):\n", " lst3 = [value for value in lst1 if value in lst2]\n", " if len(lst3) < 1: \n", " return None\n", " else:\n", " return lst3[0]\n", " \n", " \n", "class Node:\n", " id = 0\n", " graph = None\n", " inode = None\n", " def __init__(self, graph, id=None, sequence=None):\n", " self.graph = graph\n", " if sequence:\n", " self.inode = graph.vs.find(sequence=sequence)\n", " else: \n", " if id != None:\n", " self.inode = graph.vs[id] \n", " self.id = self.inode['id']\n", " \n", " def indegree(self):\n", " return self.inode.indegree()\n", " \n", " def outdegree(self):\n", " return self.inode.outdegree()" ] }, { "cell_type": "code", "execution_count": 2, "id": "cb64c924-3fca-402c-96d8-388d2dfa01d5", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:adding edge0 --> 1\n" ] } ], "source": [ "g = DBGraph('hey joe')\n", "\n", "#g.add_kmer('tttt')\n", "## bubble graph\n", "g.add('abb','bbb')" ] }, { "cell_type": "code", "execution_count": 3, "id": "019c49d1-127b-4d80-9e94-8ff612f070ad", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:adding edge0 --> 1\n", "DEBUG:root:adding edge1 --> 2\n", "DEBUG:root:adding edge2 --> 3\n", "DEBUG:root:adding edge3 --> 4\n", "DEBUG:root:adding edge3 --> 5\n", "DEBUG:root:adding edge5 --> 6\n", "DEBUG:root:adding edge6 --> 7\n", "DEBUG:root:adding edge6 --> 8\n", "DEBUG:root:adding edge8 --> 9\n", "DEBUG:root:adding edge9 --> 10\n", "DEBUG:root:adding edge4 --> 10\n", "DEBUG:root:adding edge1 --> 11\n", "DEBUG:root:adding edge1 --> 11\n", "DEBUG:root:adding edge1 --> 11\n", "DEBUG:root:adding edge11 --> 12\n", "DEBUG:root:adding edge11 --> 12\n", "DEBUG:root:adding edge12 --> 13\n", "DEBUG:root:adding edge12 --> 13\n", "DEBUG:root:adding edge13 --> 10\n", "DEBUG:root:adding edge10 --> 14\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 3, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "g = DBGraph('hey joe')\n", "\n", "#g.add_kmer('tttt')\n", "## bubble graph\n", "g.add('abb','bbb')\n", "g.add('bbb','bba')\n", "g.add('bba','bas')\n", "g.add('bas','asu')\n", "g.add('bas','ast')\n", "g.add('ast','stu')\n", "g.add('stu','tup')\n", "g.add('stu','tus')\n", "g.add('tus','usu')\n", "g.add('usu','suu')\n", "\n", "g.add('asu','suu')\n", "g.add('bbb','bbA')\n", "g.add('bbb','bbA')\n", "g.add('bbb','bbA')\n", "g.add('bbA','bAs')\n", "g.add('bbA','bAs')\n", "g.add('bAs','Asu')\n", "g.add('bAs','Asu')\n", "g.add('Asu','suu')\n", "g.add('suu','uuu')\n", "\n", "## alternative start and end nodes\n", "## g.add(\"NAT\",\"ATG\")\n", "# g.add(\"NAT\",\"ATG\")\n", "# g.add(\"NAT\",\"ATG\")\n", "# g.add(\"XAT\",\"ATG\")\n", "# g.add(\"ATG\", \"TGA\")\n", "# g.add(\"ATG\", \"TGA\")\n", "# g.add(\"ATG\", \"TGA\")\n", "# g.add(\"ATG\", \"TGA\")\n", "# g.add(\"ATG\", \"TGA\")\n", "\n", "g.viz()\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "803e878d-7fb7-49db-a1e5-0040c37363a3", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:0: len(nset0)10\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 0, {'name': 'abb', 'id': 0, 'sequence': 'abb', 'label': '0:abb', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting abb --> bbbigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 0, {'name': 'abb', 'id': 0, 'sequence': 'abb', 'label': '0:abb', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: bbb\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 0, {'name': 'abbb', 'id': 0, 'sequence': 'abbb', 'label': '[0] abbb', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:1: len(nset0)9\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 1, {'name': 'bba', 'id': 2, 'sequence': 'bba', 'label': '2:bba', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting a --> basigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 1, {'name': 'bba', 'id': 2, 'sequence': 'a', 'label': '2:bba', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: bas\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 1, {'name': 'as', 'id': 2, 'sequence': 'as', 'label': '[1] as', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:2: len(nset0)8\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 2, {'name': 'asu', 'id': 4, 'sequence': 'asu', 'label': '4:asu', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 2, {'name': 'asu', 'id': 4, 'sequence': 'asu', 'label': '4:asu', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:3: len(nset0)7\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 3, {'name': 'ast', 'id': 5, 'sequence': 'ast', 'label': '5:ast', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting t --> stuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 3, {'name': 'ast', 'id': 5, 'sequence': 't', 'label': '5:ast', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: stu\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 3, {'name': 'tu', 'id': 5, 'sequence': 'tu', 'label': '[3] tu', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:4: len(nset0)6\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 5, {'name': 'tus', 'id': 8, 'sequence': 'tus', 'label': '8:tus', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting s --> usuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 5, {'name': 'tus', 'id': 8, 'sequence': 's', 'label': '8:tus', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: usu\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 5, {'name': 'su', 'id': 8, 'sequence': 'su', 'label': '[5] su', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:5: len(nset0)4\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 6, {'name': 'suu', 'id': 10, 'sequence': 'suu', 'label': '10:suu', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting u --> uuuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 6, {'name': 'suu', 'id': 10, 'sequence': 'u', 'label': '10:suu', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: uuu\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 6, {'name': 'uu', 'id': 10, 'sequence': 'uu', 'label': '[6] uu', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:6: len(nset0)3\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'bbA', 'id': 11, 'sequence': 'bbA', 'label': '11:bbA', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting A --> bAsigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'bbA', 'id': 11, 'sequence': 'A', 'label': '11:bbA', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: bAs\n", "DEBUG:root:really compacting As --> Asuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'As', 'id': 11, 'sequence': 'As', 'label': '[7] As', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 1})\n", "DEBUG:root:deleting: Asu\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'Asu', 'id': 11, 'sequence': 'Asu', 'label': '[7] Asu', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 2})\n", "DEBUG:root:7: len(nset0)0\n", "INFO:root:no more compactable stretches\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "g.compact()\n", "g.viz()" ] }, { "cell_type": "code", "execution_count": 5, "id": "bebc8e6e-0972-4cae-aab2-ff2f655ba20d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:adding edge0 --> 1\n", "DEBUG:root:adding edge1 --> 2\n", "DEBUG:root:adding edge2 --> 0\n", "DEBUG:root:0: len(nset0)3\n", "DEBUG:root:seeking leftPZA\n", "DEBUG:root:seeking leftAPZ\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38250>, 1, {'name': 'APZ', 'id': 1, 'sequence': 'APZ', 'label': '1:APZ', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting Z --> PZAigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38250>, 1, {'name': 'APZ', 'id': 1, 'sequence': 'Z', 'label': '1:APZ', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: PZA\n", "DEBUG:root:------------------- we hit a loop, finished\n", "DEBUG:root:1: len(nset0)1\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38250>, 0, {'name': 'ZAP', 'id': 0, 'sequence': 'ZAP', 'label': '0:ZAP', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:------------------- we hit a loop, finished\n", "DEBUG:root:2: len(nset0)0\n", "INFO:root:no more compactable stretches\n", "DEBUG:root:walk: ZAPZAP\n", "DEBUG:root:walk: ZA\n", "DEBUG:root:seq: ZA\n", "INFO:root:Done\n" ] }, { "data": { "text/plain": [ "['ZAPZA']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g = DBGraph(\"cycle\")\n", "# cyclic\n", "g.add(\"ZAP\",\"APZ\")\n", "g.add(\"APZ\",\"PZA\")\n", "g.add(\"PZA\",\"ZAP\")\n", "## this must not give into infinite loop\n", "g.compact()\n", "g.assemble() # output: ['ZAPZA']" ] }, { "cell_type": "code", "execution_count": 6, "id": "9e0f85bb-8bf4-4a0d-bf1e-799fc229db92", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:adding edge0 --> 1\n", "DEBUG:root:adding edge1 --> 2\n", "DEBUG:root:adding edge2 --> 3\n", "DEBUG:root:adding edge3 --> 4\n", "DEBUG:root:adding edge4 --> 5\n", "DEBUG:root:adding edge5 --> 6\n", "DEBUG:root:adding edge6 --> 7\n", "DEBUG:root:adding edge7 --> 8\n", "DEBUG:root:adding edge8 --> 9\n", "DEBUG:root:adding edge9 --> 10\n", "DEBUG:root:adding edge10 --> 11\n", "DEBUG:root:adding edge12 --> 13\n", "DEBUG:root:adding edge13 --> 14\n", "DEBUG:root:adding edge14 --> 3\n", "DEBUG:root:adding edge3 --> 4\n", "DEBUG:root:adding edge4 --> 5\n", "DEBUG:root:adding edge5 --> 6\n", "DEBUG:root:adding edge6 --> 7\n", "DEBUG:root:adding edge7 --> 8\n", "DEBUG:root:adding edge8 --> 9\n", "DEBUG:root:adding edge9 --> 10\n", "DEBUG:root:adding edge10 --> 11\n", "DEBUG:root:adding edge15 --> 16\n", "DEBUG:root:adding edge16 --> 17\n", "DEBUG:root:adding edge17 --> 18\n", "DEBUG:root:adding edge18 --> 19\n", "DEBUG:root:adding edge19 --> 20\n", "DEBUG:root:adding edge20 --> 21\n", "DEBUG:root:adding edge21 --> 22\n", "DEBUG:root:adding edge22 --> 23\n", "DEBUG:root:adding edge23 --> 24\n", "DEBUG:root:adding edge24 --> 25\n", "DEBUG:root:adding edge25 --> 26\n", "DEBUG:root:adding edge26 --> 27\n", "DEBUG:root:adding edge27 --> 28\n", "DEBUG:root:adding edge28 --> 29\n", "DEBUG:root:adding edge29 --> 30\n", "DEBUG:root:adding edge30 --> 31\n", "DEBUG:root:adding edge31 --> 32\n", "DEBUG:root:adding edge32 --> 33\n", "DEBUG:root:adding edge33 --> 34\n", "DEBUG:root:adding edge34 --> 35\n", "DEBUG:root:adding edge35 --> 36\n", "DEBUG:root:adding edge36 --> 37\n", "DEBUG:root:adding edge37 --> 38\n", "DEBUG:root:adding edge38 --> 39\n", "DEBUG:root:adding edge39 --> 40\n", "DEBUG:root:adding edge40 --> 41\n", "DEBUG:root:adding edge41 --> 42\n", "DEBUG:root:adding edge42 --> 43\n", "DEBUG:root:adding edge43 --> 44\n", "DEBUG:root:adding edge44 --> 45\n", "DEBUG:root:adding edge45 --> 15\n", "DEBUG:root:adding edge15 --> 16\n", "DEBUG:root:adding edge16 --> 46\n", "DEBUG:root:adding edge46 --> 47\n", "DEBUG:root:adding edge47 --> 48\n", "DEBUG:root:adding edge48 --> 49\n", "DEBUG:root:adding edge49 --> 50\n", "DEBUG:root:adding edge50 --> 51\n", "DEBUG:root:adding edge51 --> 52\n", "DEBUG:root:adding edge52 --> 53\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 6, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "# test construction from file\n", "g = DBGraph('file')\n", "g.add_from_file('test.fasta')\n", "g.viz()" ] }, { "cell_type": "code", "execution_count": 7, "id": "f7532d51-e4f3-45e7-a6fd-657a372d99f0", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:0: len(nset0)51\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abc'), 'id': 0, 'sequence': Seq('abc'), 'label': Seq('0:abc'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting abc --> bcdigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abc'), 'id': 0, 'sequence': Seq('abc'), 'label': Seq('0:abc'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: bcd\n", "DEBUG:root:really compacting abcd --> cdeigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abcd'), 'id': 0, 'sequence': Seq('abcd'), 'label': Seq('[0] abcd'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:deleting: cde\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abcde'), 'id': 0, 'sequence': Seq('abcde'), 'label': Seq('[0] abcde'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n", "DEBUG:root:1: len(nset0)48\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('def'), 'id': 3, 'sequence': Seq('def'), 'label': Seq('3:def'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting f --> efgigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('def'), 'id': 3, 'sequence': Seq('f'), 'label': Seq('3:def'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: efg\n", "DEBUG:root:really compacting fg --> fghigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fg'), 'id': 3, 'sequence': Seq('fg'), 'label': Seq('[1] fg'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 1})\n", "DEBUG:root:deleting: fgh\n", "DEBUG:root:really compacting fgh --> ghiigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fgh'), 'id': 3, 'sequence': Seq('fgh'), 'label': Seq('[1] fgh'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 2})\n", "DEBUG:root:deleting: ghi\n", "DEBUG:root:really compacting fghi --> hijigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghi'), 'id': 3, 'sequence': Seq('fghi'), 'label': Seq('[1] fghi'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 6, 'dist': 3})\n", "DEBUG:root:deleting: hij\n", "DEBUG:root:really compacting fghij --> ijkigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghij'), 'id': 3, 'sequence': Seq('fghij'), 'label': Seq('[1] fghij'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 8, 'dist': 4})\n", "DEBUG:root:deleting: ijk\n", "DEBUG:root:really compacting fghijk --> jkligraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijk'), 'id': 3, 'sequence': Seq('fghijk'), 'label': Seq('[1] fghijk'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 10, 'dist': 5})\n", "DEBUG:root:deleting: jkl\n", "DEBUG:root:really compacting fghijkl --> klmigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijkl'), 'id': 3, 'sequence': Seq('fghijkl'), 'label': Seq('[1] fghijkl'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 12, 'dist': 6})\n", "DEBUG:root:deleting: klm\n", "DEBUG:root:really compacting fghijklm --> lmnigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijklm'), 'id': 3, 'sequence': Seq('fghijklm'), 'label': Seq('[1] fghijklm'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 14, 'dist': 7})\n", "DEBUG:root:deleting: lmn\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijklmn'), 'id': 3, 'sequence': Seq('fghijklmn'), 'label': Seq('[1] fghijklmn'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 16, 'dist': 8})\n", "DEBUG:root:2: len(nset0)40\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abd'), 'id': 12, 'sequence': Seq('abd'), 'label': Seq('12:abd'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting abd --> bddigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abd'), 'id': 12, 'sequence': Seq('abd'), 'label': Seq('12:abd'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: bdd\n", "DEBUG:root:really compacting abdd --> ddeigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abdd'), 'id': 12, 'sequence': Seq('abdd'), 'label': Seq('[2] abdd'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:deleting: dde\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abdde'), 'id': 12, 'sequence': Seq('abdde'), 'label': Seq('[2] abdde'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n", "DEBUG:root:3: len(nset0)37\n", "DEBUG:root:seeking left_th\n", "DEBUG:root:seeking leftr_t\n", "DEBUG:root:seeking lefter_\n", "DEBUG:root:seeking leftver\n", "DEBUG:root:seeking leftove\n", "DEBUG:root:seeking left_ov\n", "DEBUG:root:seeking lefts_o\n", "DEBUG:root:seeking leftps_\n", "DEBUG:root:seeking leftmps\n", "DEBUG:root:seeking leftump\n", "DEBUG:root:seeking leftjum\n", "DEBUG:root:seeking left_ju\n", "DEBUG:root:seeking leftx_j\n", "DEBUG:root:seeking leftox_\n", "DEBUG:root:seeking leftfox\n", "DEBUG:root:seeking left_fo\n", "DEBUG:root:seeking leftn_f\n", "DEBUG:root:seeking leftwn_\n", "DEBUG:root:seeking leftown\n", "DEBUG:root:seeking leftrow\n", "DEBUG:root:seeking leftbro\n", "DEBUG:root:seeking left_br\n", "DEBUG:root:seeking leftk_b\n", "DEBUG:root:seeking leftck_\n", "DEBUG:root:seeking leftick\n", "DEBUG:root:seeking leftuic\n", "DEBUG:root:seeking leftqui\n", "DEBUG:root:seeking left_qu\n", "DEBUG:root:seeking lefte_q\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_q'), 'id': 17, 'sequence': Seq('e_q'), 'label': Seq('17:e_q'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting q --> _quigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_q'), 'id': 17, 'sequence': Seq('q'), 'label': Seq('17:e_q'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: _qu\n", "DEBUG:root:really compacting qu --> quiigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('qu'), 'id': 17, 'sequence': Seq('qu'), 'label': Seq('[5] qu'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:deleting: qui\n", "DEBUG:root:really compacting qui --> uicigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('qui'), 'id': 17, 'sequence': Seq('qui'), 'label': Seq('[5] qui'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n", "DEBUG:root:deleting: uic\n", "DEBUG:root:really compacting quic --> ickigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quic'), 'id': 17, 'sequence': Seq('quic'), 'label': Seq('[5] quic'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 3})\n", "DEBUG:root:deleting: ick\n", "DEBUG:root:really compacting quick --> ck_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick'), 'id': 17, 'sequence': Seq('quick'), 'label': Seq('[5] quick'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 4})\n", "DEBUG:root:deleting: ck_\n", "DEBUG:root:really compacting quick_ --> k_bigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_'), 'id': 17, 'sequence': Seq('quick_'), 'label': Seq('[5] quick_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 5, 'dist': 5})\n", "DEBUG:root:deleting: k_b\n", "DEBUG:root:really compacting quick_b --> _brigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_b'), 'id': 17, 'sequence': Seq('quick_b'), 'label': Seq('[5] quick_b'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 6, 'dist': 6})\n", "DEBUG:root:deleting: _br\n", "DEBUG:root:really compacting quick_br --> broigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_br'), 'id': 17, 'sequence': Seq('quick_br'), 'label': Seq('[5] quick_br'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 7, 'dist': 7})\n", "DEBUG:root:deleting: bro\n", "DEBUG:root:really compacting quick_bro --> rowigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_bro'), 'id': 17, 'sequence': Seq('quick_bro'), 'label': Seq('[5] quick_bro'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 8, 'dist': 8})\n", "DEBUG:root:deleting: row\n", "DEBUG:root:really compacting quick_brow --> ownigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brow'), 'id': 17, 'sequence': Seq('quick_brow'), 'label': Seq('[5] quick_brow'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 9, 'dist': 9})\n", "DEBUG:root:deleting: own\n", "DEBUG:root:really compacting quick_brown --> wn_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown'), 'id': 17, 'sequence': Seq('quick_brown'), 'label': Seq('[5] quick_brown'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 10, 'dist': 10})\n", "DEBUG:root:deleting: wn_\n", "DEBUG:root:really compacting quick_brown_ --> n_figraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_'), 'id': 17, 'sequence': Seq('quick_brown_'), 'label': Seq('[5] quick_brown_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 11, 'dist': 11})\n", "DEBUG:root:deleting: n_f\n", "DEBUG:root:really compacting quick_brown_f --> _foigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_f'), 'id': 17, 'sequence': Seq('quick_brown_f'), 'label': Seq('[5] quick_brown_f'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 12, 'dist': 12})\n", "DEBUG:root:deleting: _fo\n", "DEBUG:root:really compacting quick_brown_fo --> foxigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fo'), 'id': 17, 'sequence': Seq('quick_brown_fo'), 'label': Seq('[5] quick_brown_fo'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 13, 'dist': 13})\n", "DEBUG:root:deleting: fox\n", "DEBUG:root:really compacting quick_brown_fox --> ox_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox'), 'id': 17, 'sequence': Seq('quick_brown_fox'), 'label': Seq('[5] quick_brown_fox'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 14, 'dist': 14})\n", "DEBUG:root:deleting: ox_\n", "DEBUG:root:really compacting quick_brown_fox_ --> x_jigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_'), 'id': 17, 'sequence': Seq('quick_brown_fox_'), 'label': Seq('[5] quick_brown_fox_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 15, 'dist': 15})\n", "DEBUG:root:deleting: x_j\n", "DEBUG:root:really compacting quick_brown_fox_j --> _juigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_j'), 'id': 17, 'sequence': Seq('quick_brown_fox_j'), 'label': Seq('[5] quick_brown_fox_j'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 16, 'dist': 16})\n", "DEBUG:root:deleting: _ju\n", "DEBUG:root:really compacting quick_brown_fox_ju --> jumigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_ju'), 'id': 17, 'sequence': Seq('quick_brown_fox_ju'), 'label': Seq('[5] quick_brown_fox_ju'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 17, 'dist': 17})\n", "DEBUG:root:deleting: jum\n", "DEBUG:root:really compacting quick_brown_fox_jum --> umpigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jum'), 'id': 17, 'sequence': Seq('quick_brown_fox_jum'), 'label': Seq('[5] quick_brown_fox_jum'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 18, 'dist': 18})\n", "DEBUG:root:deleting: ump\n", "DEBUG:root:really compacting quick_brown_fox_jump --> mpsigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jump'), 'id': 17, 'sequence': Seq('quick_brown_fox_jump'), 'label': Seq('[5] quick_brown_fox_jump'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 19, 'dist': 19})\n", "DEBUG:root:deleting: mps\n", "DEBUG:root:really compacting quick_brown_fox_jumps --> ps_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps'), 'label': Seq('[5] quick_brown_fox_jumps'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 20, 'dist': 20})\n", "DEBUG:root:deleting: ps_\n", "DEBUG:root:really compacting quick_brown_fox_jumps_ --> s_oigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_'), 'label': Seq('[5] quick_brown_fox_jumps_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 21, 'dist': 21})\n", "DEBUG:root:deleting: s_o\n", "DEBUG:root:really compacting quick_brown_fox_jumps_o --> _ovigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_o'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_o'), 'label': Seq('[5] quick_brown_fox_jumps_o'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 22, 'dist': 22})\n", "DEBUG:root:deleting: _ov\n", "DEBUG:root:really compacting quick_brown_fox_jumps_ov --> oveigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_ov'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_ov'), 'label': Seq('[5] quick_brown_fox_jumps_ov'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 23, 'dist': 23})\n", "DEBUG:root:deleting: ove\n", "DEBUG:root:really compacting quick_brown_fox_jumps_ove --> verigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_ove'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_ove'), 'label': Seq('[5] quick_brown_fox_jumps_ove'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 24, 'dist': 24})\n", "DEBUG:root:deleting: ver\n", "DEBUG:root:really compacting quick_brown_fox_jumps_over --> er_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over'), 'label': Seq('[5] quick_brown_fox_jumps_over'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 25, 'dist': 25})\n", "DEBUG:root:deleting: er_\n", "DEBUG:root:really compacting quick_brown_fox_jumps_over_ --> r_tigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over_'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over_'), 'label': Seq('[5] quick_brown_fox_jumps_over_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 26, 'dist': 26})\n", "DEBUG:root:deleting: r_t\n", "DEBUG:root:really compacting quick_brown_fox_jumps_over_t --> _thigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over_t'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over_t'), 'label': Seq('[5] quick_brown_fox_jumps_over_t'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 27, 'dist': 27})\n", "DEBUG:root:deleting: _th\n", "DEBUG:root:really compacting quick_brown_fox_jumps_over_th --> theigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over_th'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over_th'), 'label': Seq('[5] quick_brown_fox_jumps_over_th'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 28, 'dist': 28})\n", "DEBUG:root:deleting: the\n", "DEBUG:root:------------------- we hit a loop, finished\n", "DEBUG:root:4: len(nset0)7\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_l'), 'id': 46, 'sequence': Seq('e_l'), 'label': Seq('46:e_l'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting l --> _laigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_l'), 'id': 46, 'sequence': Seq('l'), 'label': Seq('46:e_l'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: _la\n", "DEBUG:root:really compacting la --> lazigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('la'), 'id': 46, 'sequence': Seq('la'), 'label': Seq('[5] la'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:deleting: laz\n", "DEBUG:root:really compacting laz --> azyigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('laz'), 'id': 46, 'sequence': Seq('laz'), 'label': Seq('[5] laz'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n", "DEBUG:root:deleting: azy\n", "DEBUG:root:really compacting lazy --> zy_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy'), 'id': 46, 'sequence': Seq('lazy'), 'label': Seq('[5] lazy'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 3})\n", "DEBUG:root:deleting: zy_\n", "DEBUG:root:really compacting lazy_ --> y_digraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_'), 'id': 46, 'sequence': Seq('lazy_'), 'label': Seq('[5] lazy_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 4})\n", "DEBUG:root:deleting: y_d\n", "DEBUG:root:really compacting lazy_d --> _doigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_d'), 'id': 46, 'sequence': Seq('lazy_d'), 'label': Seq('[5] lazy_d'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 5, 'dist': 5})\n", "DEBUG:root:deleting: _do\n", "DEBUG:root:really compacting lazy_do --> dogigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_do'), 'id': 46, 'sequence': Seq('lazy_do'), 'label': Seq('[5] lazy_do'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 6, 'dist': 6})\n", "DEBUG:root:deleting: dog\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_dog'), 'id': 46, 'sequence': Seq('lazy_dog'), 'label': Seq('[5] lazy_dog'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 7, 'dist': 7})\n", "DEBUG:root:5: len(nset0)0\n", "INFO:root:no more compactable stretches\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "g.compact()\n", "g.viz()" ] }, { "cell_type": "code", "execution_count": 8, "id": "3c8389d8-f81c-45d1-aea6-29817e010217", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:root:1: bubble detected:he_ ---> lazy_dog\n", "DEBUG:root:[[5], [4, 3, 5]]\n", "INFO:root:deleting the following nodes: []\n", "DEBUG:root:walk: abcdeabcde\n", "DEBUG:root:walk: fghijklmn\n", "DEBUG:root:seq: fghijklmn\n", "INFO:root:Done\n", "DEBUG:root:walk: abddeabdde\n", "INFO:root:Done\n", "DEBUG:root:walk: quick_brown_fox_jumps_over_thequick_brown_fox_jumps_over_the\n", "DEBUG:root:walk: he_\n", "DEBUG:root:seq: _\n", "DEBUG:root:walk: lazy_dog\n", "DEBUG:root:seq: lazy_dog\n", "INFO:root:Done\n" ] }, { "data": { "text/plain": [ "[Seq('abcdefghijklmn'),\n", " Seq('abdde'),\n", " Seq('quick_brown_fox_jumps_over_the_lazy_dog')]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g.remove_bubbles()\n", "g.assemble() ## [Seq('12345678'), Seq('abcdefghijklmnopqrstuvwXYZ')] \n", "# output: [Seq('abcdefghijklmn'),\n", "# Seq('abdde'),\n", "# Seq('quick_brown_fox_jumps_over_the_lazy_dog')]\n", "# no bubble will be deleted because it is indeed a loop" ] }, { "cell_type": "code", "execution_count": 9, "id": "51090d0b-3457-46fd-ac00-f74a643e8269", "metadata": { "scrolled": true, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:adding edge0 --> 1\n", "DEBUG:root:adding edge1 --> 2\n", "DEBUG:root:adding edge1 --> 2\n", "DEBUG:root:adding edge1 --> 2\n", "DEBUG:root:adding edge1 --> 2\n", "DEBUG:root:adding edge2 --> 3\n", "DEBUG:root:adding edge2 --> 4\n", "DEBUG:root:adding edge5 --> 6\n", "DEBUG:root:adding edge5 --> 6\n", "DEBUG:root:adding edge6 --> 7\n", "DEBUG:root:adding edge8 --> 8\n", "DEBUG:root:adding edge8 --> 8\n", "DEBUG:root:adding edge8 --> 8\n", "DEBUG:root:adding edge8 --> 8\n", "DEBUG:root:adding edge8 --> 8\n", "DEBUG:root:adding edge8 --> 9\n", "DEBUG:root:adding edge8 --> 9\n", "DEBUG:root:adding edge9 --> 10\n", "DEBUG:root:adding edge10 --> 11\n", "DEBUG:root:adding edge11 --> 8\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ " \n", "\n", " \n", "g = DBGraph('test more stuff')\n", "g.add(\"TGA\",\"GAA\")\n", "g.add(\"GAA\",\"AAT\")\n", "g.add(\"GAA\",\"AAT\")\n", "g.add(\"GAA\",\"AAT\")\n", "g.add(\"GAA\",\"AAT\")\n", "\n", "g.add(\"AAT\",\"ATT\")\n", "g.add(\"AAT\",\"ATC\")\n", "\n", "# short linear graph\n", "g.add(\"POP\",\"OPA\")\n", "g.add('POP','OPA')\n", "g.add(\"OPA\",\"PAM\")\n", "\n", "\n", "\n", "## self repeat + cyclic graph\n", "g.add(\"XXX\",\"XXX\")\n", "g.add(\"XXX\",\"XXX\")\n", "g.add(\"XXX\",\"XXX\")\n", "g.add(\"XXX\",\"XXX\")\n", "g.add(\"XXX\",\"XXX\")\n", "g.add(\"XXX\",\"XXY\")\n", "g.add(\"XXX\",\"XXY\")\n", "\n", "g.add(\"XXY\",\"XYX\")\n", "g.add(\"XYX\",\"YXX\")\n", "g.add(\"YXX\",\"XXX\")\n", "g.viz()" ] }, { "cell_type": "code", "execution_count": 10, "id": "3bbab911-a050-4ffa-a2c9-f0af11c4d6d4", "metadata": { "scrolled": true, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:0: len(nset0)7\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGA', 'id': 0, 'sequence': 'TGA', 'label': '0:TGA', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting TGA --> GAAigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGA', 'id': 0, 'sequence': 'TGA', 'label': '0:TGA', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: GAA\n", "DEBUG:root:really compacting TGAA --> AATigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGAA', 'id': 0, 'sequence': 'TGAA', 'label': '[0] TGAA', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:deleting: AAT\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGAAT', 'id': 0, 'sequence': 'TGAAT', 'label': '[0] TGAAT', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 5, 'dist': 2})\n", "DEBUG:root:1: len(nset0)5\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POP', 'id': 5, 'sequence': 'POP', 'label': '5:POP', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting POP --> OPAigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POP', 'id': 5, 'sequence': 'POP', 'label': '5:POP', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: OPA\n", "DEBUG:root:really compacting POPA --> PAMigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POPA', 'id': 5, 'sequence': 'POPA', 'label': '[3] POPA', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 1})\n", "DEBUG:root:deleting: PAM\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POPAM', 'id': 5, 'sequence': 'POPAM', 'label': '[3] POPAM', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 2})\n", "DEBUG:root:2: len(nset0)3\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'XXY', 'id': 9, 'sequence': 'XXY', 'label': '9:XXY', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting Y --> XYXigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'XXY', 'id': 9, 'sequence': 'Y', 'label': '9:XXY', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: XYX\n", "DEBUG:root:really compacting YX --> YXXigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'YX', 'id': 9, 'sequence': 'YX', 'label': '[5] YX', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:deleting: YXX\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'YXX', 'id': 9, 'sequence': 'YXX', 'label': '[5] YXX', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n", "DEBUG:root:3: len(nset0)0\n", "INFO:root:no more compactable stretches\n", "DEBUG:root:walk: TGAATTGAAT\n", "DEBUG:root:walk: ATT\n", "DEBUG:root:seq: T\n", "INFO:root:Done\n", "DEBUG:root:walk: POPAMPOPAM\n", "INFO:root:Done\n", "DEBUG:root:walk: ATCATC\n", "INFO:root:Done\n", "DEBUG:root:walk: XXXXXX\n", "DEBUG:root:walk: YXX\n", "DEBUG:root:seq: YXX\n", "INFO:root:Done\n" ] }, { "data": { "text/plain": [ "['TGAATT', 'POPAM', 'XXXYXX']" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g.compact()\n", "g.assemble() # should give: ['TGAATT', 'POPAM', 'XXXYXX']\n", " \n", " " ] }, { "cell_type": "code", "execution_count": 11, "id": "14edba89-d3d9-4376-b704-b95e4b397992", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:adding edge0 --> 1\n", "DEBUG:root:adding edge1 --> 2\n", "DEBUG:root:adding edge2 --> 3\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "g = DBGraph('hey joe')\n", "g.add('123','234')\n", "g.add('234','345')\n", "g.add('345','456')\n", "g.viz()\n" ] }, { "cell_type": "code", "execution_count": 12, "id": "435eab49-0949-4a85-9def-902e46f6209d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:walk: 123123\n", "DEBUG:root:walk: 234\n", "DEBUG:root:seq: 4\n", "DEBUG:root:walk: 345\n", "DEBUG:root:seq: 5\n", "DEBUG:root:walk: 456\n", "DEBUG:root:seq: 6\n", "INFO:root:Done\n" ] }, { "data": { "text/plain": [ "['123456']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g.assemble() # simple graphs can be assembled directly: 123456\n" ] }, { "cell_type": "code", "execution_count": 13, "id": "a421dc34-cfe7-463d-87dd-8bf623d24e6f", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:0: len(nset0)3\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '123', 'id': 0, 'sequence': '123', 'label': '0:123', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting 123 --> 234igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '123', 'id': 0, 'sequence': '123', 'label': '0:123', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: 234\n", "DEBUG:root:really compacting 1234 --> 345igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '1234', 'id': 0, 'sequence': '1234', 'label': '[0] 1234', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:deleting: 345\n", "DEBUG:root:really compacting 12345 --> 456igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '12345', 'id': 0, 'sequence': '12345', 'label': '[0] 12345', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n", "DEBUG:root:deleting: 456\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '123456', 'id': 0, 'sequence': '123456', 'label': '[0] 123456', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 3})\n", "DEBUG:root:1: len(nset0)0\n", "INFO:root:no more compactable stretches\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 13, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "g.compact()\n", "g.viz()\n", "\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "c5ab5086-11b5-4ab3-89e5-b12af36148fb", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:adding edge0 --> 1\n", "DEBUG:root:adding edge1 --> 2\n", "DEBUG:root:adding edge2 --> 3\n", "DEBUG:root:adding edge2 --> 4\n", "DEBUG:root:adding edge3 --> 5\n", "DEBUG:root:adding edge4 --> 6\n", "DEBUG:root:adding edge5 --> 7\n", "DEBUG:root:adding edge6 --> 8\n", "DEBUG:root:adding edge7 --> 1\n", "DEBUG:root:adding edge8 --> 1\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "g = DBGraph('double loop')\n", "g.add('0ab','abc')\n", "g.add('abc','bcd')\n", "g.add('bcd','cde')\n", "g.add('bcd','cdE')\n", "g.add('cde','dea')\n", "g.add('cdE','dEa')\n", "g.add('dea','eab')\n", "g.add('dEa','Eab')\n", "g.add('eab','abc')\n", "g.add('Eab','abc')\n", "g.viz()\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "513dde3e-f4eb-490d-bcf6-abee1382f6c3", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:0: len(nset0)8\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 0, {'name': '0ab', 'id': 0, 'sequence': '0ab', 'label': '0:0ab', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 0, {'name': '0ab', 'id': 0, 'sequence': '0ab', 'label': '0:0ab', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:1: len(nset0)7\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'abc', 'id': 1, 'sequence': 'abc', 'label': '1:abc', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting c --> bcdigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'abc', 'id': 1, 'sequence': 'c', 'label': '1:abc', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: bcd\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'cd', 'id': 1, 'sequence': 'cd', 'label': '[1] cd', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:2: len(nset0)6\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'cde', 'id': 3, 'sequence': 'cde', 'label': '3:cde', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting e --> deaigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'cde', 'id': 3, 'sequence': 'e', 'label': '3:cde', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: dea\n", "DEBUG:root:really compacting ea --> eabigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'ea', 'id': 3, 'sequence': 'ea', 'label': '[2] ea', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:deleting: eab\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'eab', 'id': 3, 'sequence': 'eab', 'label': '[2] eab', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n", "DEBUG:root:3: len(nset0)3\n", "DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'cdE', 'id': 4, 'sequence': 'cdE', 'label': '4:cdE', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n", "DEBUG:root:really compacting E --> dEaigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'cdE', 'id': 4, 'sequence': 'E', 'label': '4:cdE', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n", "DEBUG:root:deleting: dEa\n", "DEBUG:root:really compacting Ea --> Eabigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'Ea', 'id': 4, 'sequence': 'Ea', 'label': '[3] Ea', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n", "DEBUG:root:deleting: Eab\n", "DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'Eab', 'id': 4, 'sequence': 'Eab', 'label': '[3] Eab', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n", "DEBUG:root:4: len(nset0)0\n", "INFO:root:no more compactable stretches\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "g.compact()\n", "g.viz()" ] }, { "cell_type": "code", "execution_count": 16, "id": "e0840c51-cf2a-4370-ab3e-ca935842afc4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "INFO:root:1: bubble detected:cd ---> eab\n", "DEBUG:root:[[2], [3, 1, 2]]\n", "INFO:root:deleting the following nodes: []\n" ] }, { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": { "image/svg+xml": { "isolated": true } }, "output_type": "execute_result" } ], "source": [ "g.remove_bubbles()\n", "g.viz()" ] }, { "cell_type": "code", "execution_count": 17, "id": "94913ca2-943a-4d8a-b78f-8327ed6cb1c2", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "DEBUG:root:walk: 0ab0ab\n", "DEBUG:root:walk: cd\n", "DEBUG:root:seq: cd\n", "DEBUG:root:walk: eab\n", "DEBUG:root:seq: eab\n", "DEBUG:root:has unvisited next: [igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'cd', 'id': 1, 'sequence': 'cd', 'label': '1:cd', 'visited': True, 'compacted': True, 'coverage': 1.0, 'sum': 1, 'dist': 1})]\n", "DEBUG:root:has unvisited next: [igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'cd', 'id': 1, 'sequence': 'cd', 'label': '1:cd', 'visited': True, 'compacted': True, 'coverage': 1.0, 'sum': 1, 'dist': 1})]\n", "DEBUG:root:walk: cd\n", "DEBUG:root:seq: cd\n", "DEBUG:root:walk: Eab\n", "DEBUG:root:seq: Eab\n", "INFO:root:Done\n" ] }, { "data": { "text/plain": [ "['0abcdeabcdEab']" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g.assemble() # should give: ['0abcdeabcdEab']" ] }, { "cell_type": "code", "execution_count": null, "id": "8126ad58-9951-4e3f-bc58-5981b6fc3211", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "dc39e14c-30eb-42d9-b4bc-d279cfd2223d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" } }, "nbformat": 4, "nbformat_minor": 5 }