{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "4d22101e-2686-42a9-91fd-ab29eb4555be",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"import igraph as ig\n",
"from igraph import Graph\n",
"from Bio import SeqIO\n",
"import logging\n",
"from logging import debug, info, warning, error\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.DEBUG)\n",
"\n",
"\n",
"class DBGraph(Graph):\n",
" newid = 0 # allow for a fixed id per node\n",
" k = 4 # we need to know the k-mer size (note: the length of the sequence per node is k-1)\n",
" #igraph = None\n",
" name = \"\"\n",
" compacted = False\n",
" contigs = []\n",
" \n",
" def __init__(self, name=\"\", k=4):\n",
" self.name = name\n",
" self.k = k\n",
" super().__init__(directed=True)\n",
"\n",
" \n",
" \n",
" \n",
" \n",
" # utility function to quickly retrieve the edge between two igraph nodes\n",
" # this function should exist in igraph itself\n",
" def edge(self, n1, n2):\n",
" gl = self\n",
" return gl.es[gl.get_eid(n1.index, n2.index)]\n",
" \n",
" \"\"\"\n",
" \n",
" utility functions: adds the node sequences to the graph\n",
" This one initialized a fresh node if the sequence is not already there.\n",
" \n",
" \"\"\"\n",
" \n",
" def find_or_fresh_node(self, s):\n",
" new_node = None\n",
" g = self\n",
" try:\n",
" new_node = g.vs.find(sequence=s) \n",
" except: \n",
" if not new_node:\n",
" new_node = g.add_vertex(name=s)\n",
" new_node['id'] = self.newid # we need to keep a fixed id to reference nodes if other nodes are deleted\n",
" new_node['sequence'] = s\n",
" new_node['label'] = str(new_node.index) +':'+ s\n",
" new_node['visited'] = False\n",
" new_node['compacted'] = False\n",
" new_node['coverage'] = 1\n",
" new_node['sum'] = 0\n",
" new_node['dist'] = 0\n",
" \n",
" assert new_node, 'Failed to create new node!'\n",
" self.newid += 1 \n",
" return new_node\n",
" \n",
" \n",
" def add(self, s1, s2):\n",
" if self.compacted:\n",
" raise Exception(\"New nodes cannot be added to a compacted graph.\")\n",
" g = self\n",
" n1 = self.find_or_fresh_node(s1) \n",
" n2 = self.find_or_fresh_node(s2) \n",
" \n",
" i1 = n1.index\n",
" i2 = n2.index\n",
" debug (\"adding edge\"+str(i1)+\" --> \"+str(i2))\n",
" eid = None\n",
" try: # the edge could be there already\n",
" eid = g.get_eid(i1,i2)\n",
" g.es[eid][\"coverage\"] += 1 ## in a true DeBrujn Graph the edges carry the multiplicity!\n",
" except: # the edge is new\n",
" g.add_edges([(i1,i2)])\n",
" eid = g.get_eid(i1,i2)\n",
" g.es[eid][\"coverage\"] = 1 \n",
" \n",
" def add_kmer(self,kmer):\n",
" assert len(kmer) == self.k, \"kmer sequence must have length k\" \n",
" self.add(kmer[0:self.k-1], kmer[1:self.k])\n",
" \n",
" def add_from_file(self, file): \n",
" for record in SeqIO.parse(file, 'fasta'):\n",
" i = 0\n",
" while i < (len(record.seq) - self.k + 1):\n",
" self.add_kmer(record.seq[i:i+self.k])\n",
" i += 1\n",
" \n",
"\n",
" def get_node0(self):\n",
" return Node(graph=self, id=0)\n",
" \n",
" \"\"\"\n",
" compact the graph by compressing linear stretches of nodes into a single node\n",
" and deleting the redundant nodes\n",
" conserves coverage by computing the average coverage for the compacted nodes\n",
" and stores it in the node\n",
" That leads to a much more compact representation and reduces the effort of \n",
" subsequenct filtering and contig generation\n",
" Note that no more k-mers can be added after compaction, because the original \n",
" k-mers can no longer be found by dictionary lookup\n",
" \"\"\"\n",
" \n",
" \n",
" def compact(self):\n",
" if self.compacted:\n",
" raise Exception(\"Compaction can only be run once per graph\")\n",
" self.compacted = True\n",
" ## look for simple 0 start nodes first\n",
" ## only nodes with outdegree can be compacted \n",
" # for ni in nset0: print(ni[\"name\"])\n",
" i = 0\n",
" # update set of not yet compacted nodes after each iteration\n",
" while True: # this needs to be done this way, otherwise the set is not updated correctly \n",
" nset0 = self.vs.select(_outdegree_eq = 1, compacted = False)\n",
" debug(str (i) + ': len(nset0)' + str(len(nset0)) )\n",
" if len(nset0) < 1: \n",
" info('no more compactable stretches')\n",
" break\n",
" n = nset0[0]\n",
" i += 1\n",
" ### we want to walk to the left-most position, before engaging with the compaction\n",
" n['visited'] = True\n",
" while (n.indegree() == 1 and n.predecessors()[0].outdegree() == 1 and not n.predecessors()[0]['visited']):\n",
" # move left \n",
" n = n.predecessors()[0]\n",
" n['visited'] = True # avoid infinite loop\n",
" debug('seeking left' + n['sequence'] )\n",
" for noob in self.vs: noob['visited'] = False # set it back\n",
" debug (\"Start compacting: \" + str(n) + str(n.outdegree())) \n",
" assert n.outdegree() == 1 and (not n['compacted']), \"We hit an already compacted node!\" # this should never happen because of select statement\n",
" first_time = True\n",
" # don't take this node again\n",
" # we are going to simply delete the neighbors, there is only one dircet neighbor\n",
" # but we also need to check if the next node is balanced\n",
" n['compacted'] = True # mark this node as compacted, so it won't come up again\n",
" n['sum'] = 0 # compacted nodes store their coverage themselves, sum of coverages\n",
" n['dist'] = 0 # distance forward to next \"junction\" ahead\n",
" \n",
" ## control id, keeping the originally assigned ids in the nodes \n",
" cid = n[\"id\"]\n",
" \n",
" ## This is the actual compaction for node n \n",
" while True: \n",
" n = self.vs.select(id = cid)[0] \n",
" ## when deletng nodes, weird things kan happen, so make sure we got the same node as before\n",
" assert n, \"node must stay the same under one iteration\" \n",
" ## exit condition: we don't have exactly 1 successor, or the next node has \n",
" ## multiple incoming nodes\n",
" if (n.outdegree() != 1 or n.successors()[0].indegree() != 1):\n",
" debug ('-------------- finished with node ' + str(n))\n",
" break \n",
" \n",
" \n",
" succ = n.successors()[0] # node is linear, so we are sure\n",
" ### don't follow only self-reference:\n",
" if n == succ: \n",
" debug('------------------ hit a self reference, finished')\n",
" break\n",
" if succ['visited']:\n",
" debug ('------------------ hit a loop here')\n",
" break\n",
" if n in n.successors() or n in succ.successors():\n",
" debug('------------------- we hit a loop, finished')\n",
" break\n",
" \n",
" ## Only the very first node in our path needs to keep the full k-mer, that is a node with in_degree 0\n",
" ## All other need to be clipped now\n",
" if n.indegree() > 0 and first_time:\n",
" # otherwise we would artificially prolong the sequence\n",
" # indegree 0 nodes always keep their sequence\n",
" n['sequence'] = n['sequence'][self.k-2:]\n",
" first_time = False\n",
" else: \n",
" first_time = False # but only once, otherwise we'd chop off the whole sequence\n",
" \n",
" debug(\"really compacting \" + n['sequence'] + ' --> ' + succ['sequence'] + str(n))\n",
" n['visited'] = True\n",
" eid = self.get_eid(n.index, succ.index) # geet the edge id\n",
" ## add edges to all succ.successors to this node\n",
" for s in succ.successors():\n",
" self.add_edges([(n.index,s.index)]) ## add a new edge, \n",
" new_eid = self.get_eid(n.index, s.index) ## and get the index of the edge we just made\n",
" ## we need to copy the coverage attribute over, \n",
" ## so get the edge connecting \n",
" ## successor to its next successor\n",
" succ_eid = self.get_eid(succ.index, s.index)\n",
" self.es[new_eid]['coverage'] = self.es[succ_eid]['coverage'] # copy coverage \n",
" ## get the successors single character and add to node sequence\n",
" if succ[\"compacted\"]:\n",
" ## add the sequence of the compacted node, it is already compacted\n",
" n[\"sequence\"] += succ[\"sequence\"]\n",
" n[\"sum\"] += succ[\"sum\"]\n",
" n['dist'] += succ['dist'] \n",
" else:\n",
" n[\"sequence\"] += succ[\"sequence\"][-1:]\n",
" if n['sum'] == None:\n",
" error ('this should never happen: ' + str(n))\n",
" assert self.es[eid]['coverage'], \"Every edge should have a coverage attribute\"\n",
" assert n['sum'] != None, \"we set the coverage attribute already\"\n",
" n['sum'] += self.es[eid]['coverage'] # here the coverage is in the edge\n",
" n['dist'] += 1 # now we have made only a single step \n",
" n[\"name\"] = n[\"sequence\"] \n",
" n['label'] = '['+ str(n.index) +'] '+ n['sequence'] \n",
" ## now delete the successor and the edge\n",
" eid = self.get_eid(n.index, succ.index)\n",
" self.delete_edges(eid)\n",
" debug (\"deleting: \" + succ['name'])\n",
" self.delete_vertices(succ.index)\n",
" \n",
" ## Now the compaction is complete, but we still have sum and dist per node, but not coverage, so we need to update\n",
" ## the compacted nodes in a last sweep over all compacted nodes\n",
" for n in self.vs: n['visited'] = False \n",
" for n in self.vs.select(compacted = True):\n",
" if (n['dist'] == 0): \n",
" n['coverage'] = 1\n",
" n['compacted'] = False ## these nodes were not really compacted, there were just marked \n",
" else:\n",
" n['coverage'] = n['sum']/n['dist']\n",
" \n",
" \n",
" \n",
" def viz(self):\n",
" g = self\n",
" layout = g.layout(\"kk\")\n",
" color_dict = {True: \"lightgreen\", False: \"tomato\"}\n",
" colors = [color_dict[c] for c in g.vs[\"compacted\"]]\n",
" shapes = []\n",
" visual_style = {}\n",
" visual_style[\"edge_width\"] = [0.5 * int(coverage) for coverage in g.es[\"coverage\"]]\n",
" visual_style[\"vertex_size\"] = [20 * int(coverage) for coverage in g.vs[\"coverage\"]]\n",
" for s in g.vs.indegree():\n",
" if s == 0:\n",
" shapes.append(\"rectangle\")\n",
" else:\n",
" shapes.append(\"circle\")\n",
" g.vs['label'] = list(map(lambda x: str(x.index)+':'+x['sequence'], g.vs)) \n",
" return(ig.plot(g, bbox=(0, 0, 400, 400), vertex_color=colors, \n",
" vertex_shape=shapes, vertex_label_dist = -0.5, layout=layout, **visual_style ))\n",
"\n",
" \n",
"\n",
" \n",
" \n",
" ## utility function to get the best node, strategy:\n",
" ## ignore self reference\n",
" ## 0. if no further node: None\n",
" ## 1. if outdegree == 1, get the node\n",
" ## 2. if outdegree > 0, prioritize:\n",
" ## 1) get not visited node with highest coverage\n",
" ## the user is responsible to mark node as visited if it is used\n",
" ## we don't want to hit on an infinite loop, but if a visited node has an unvisited next node\n",
" ## it could still be part of an Eulerian walk\n",
" \n",
" \"\"\"\n",
" TODO: This function has much room for improvement. It may break up a perfectly Eulerian\n",
" walk through the graph. Find an example of such a Eulerian graph topology. How could this be\n",
" improved?\n",
" \"\"\"\n",
" def get_next_node(self, n):\n",
" \n",
" if (n.outdegree() < 1): return None\n",
" if (n.outdegree() == 1):\n",
" nn = n.successors()[0] ## there is only one, return it, but only if it is not self referential\n",
" if n == nn: # or nn['visited']: # self reference or already visited\n",
" ### this is sub-optimal, we cannot solve graph named \"double loop\" below\n",
" ### like this properly, at least it protects against infinite loop\n",
" ### in order to resolve double loops and alternative paths, we have to return \n",
" ### nn in case it has an edge that we haven't visited yet\n",
" return None\n",
" if not all(list(map(lambda y: y['visited'] or nn == y, nn.successors()))):\n",
" # checks, if of successor's successors has not been visited yet\n",
" # slightly better, but still not optimal, one would want to define a \n",
" # coverage threshold, at this point. However, such threshold should be\n",
" # adaptive, depending on global or local coverage\n",
" # Idea: calculate local coverage at the environment,\n",
" # to use a node twice, each edge to traverse should have at least similar (e.g. 1/2?) coverage\n",
" # compared to the local environment\n",
" return nn\n",
" else: \n",
" return None\n",
" \n",
" nset = n.successors()\n",
" tmp = list(filter (lambda x: not x['visited'], nset))\n",
" if len(tmp) > 0: \n",
" nn = sorted(tmp, key=lambda node: (n.outdegree(), node['coverage'], self.edge(n, node)['coverage'], len(node['sequence'] )), reverse=True)[0] \n",
" if n == nn: # self reference\n",
" return None\n",
" else:\n",
" return nn\n",
" else: \n",
" tmp = nset \n",
" tmp = list(filter(lambda x: (not all(list(map(lambda y: y['visited'] or x == y, x.successors())))), tmp))\n",
" \n",
" if len(tmp) < 1: \n",
" return None\n",
" else:\n",
" debug ('has unvisited next: ' + str(tmp)) \n",
" tmp = sorted(tmp, key=lambda node: (node['coverage'], self.edge(n, node)['coverage'], len(node['sequence'] )), reverse=True) \n",
" \n",
" nn = tmp[0]\n",
" if n == nn: # self reference\n",
" return None\n",
" else:\n",
" return nn\n",
" \n",
" \"\"\"\n",
" make a walk through the graph, given the start node\n",
" \"\"\"\n",
" def walk(self, n):\n",
" sequence = n['sequence']\n",
" n['visited'] = True\n",
" n1 = n\n",
" debug('walk: ' + n1['name'] + n1['sequence'])\n",
" while self.get_next_node(n1):\n",
" \n",
" n1 = self.get_next_node(n1)\n",
" debug('walk: ' + n1['name'])\n",
" n1['visited'] = True\n",
" if n1['compacted']:\n",
" sequence += n1['sequence']\n",
" debug ('seq: '+n1['sequence'])\n",
" else:\n",
" sequence += n1['sequence'][self.k-2]\n",
" debug ('seq: '+n1['sequence'][self.k-2])\n",
" info ('Done') \n",
" return sequence \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" ### The gredy assembly is the simplest possible heuristics and works on the compacted graph\n",
" ### With greedy bubble correction and at every junction, follow the path of highest coverage\n",
" ### No variants will be detected this way\n",
" ### Dangling short ends will be cut off if they are shorter than min_contig_len, and there is another \n",
" ### path > 2*k ahead, even if they have higher coverage \n",
" ### If a repeat (a node that has already been visited) is hit with distance < min_contig_len, the contig will be dropped \n",
" # - get all start nodes, easy ones first until all nodes have been visited\n",
" def assemble(self):\n",
" \n",
" contigs = []\n",
" min_contig_len = self.k ## very lenient, could be increased but ok for toy examples\n",
" \n",
" # get optimal start nodes first: optimal start nodes are those with indegree 0 and not visited\n",
" counter = 0\n",
" nset0 = self.vs.select( visited = False, _indegree = 0 )\n",
" \n",
" while counter <= len(nset0): # protect against infinite loop\n",
" nset = self.vs.select(visited = False, _indegree = 0)\n",
" # we could also sort the nodeset to refine our strategy further but we don't need that now\n",
" if len(nset) < 1: break\n",
" contig = self.walk(nset[0])\n",
" if len(contig) >= min_contig_len:\n",
" contigs.append(contig)\n",
" \n",
" # do the same for remaining other unvisited nodes\n",
" nset0 = self.vs.select( visited = False)\n",
" \n",
" while counter <= len(nset0): # protect against infinite loop\n",
" \n",
" nset = self.vs.select(visited = False)\n",
" # we could also sort the nodeset to refine our strategy further\n",
" # as we might end up being somewhere in a loop, we could as well start with\n",
" # the longest node\n",
" nset = sorted(nset, key = lambda x: len(x['sequence']), reverse = True)\n",
" if len(nset) < 1: break\n",
" contig = self.walk(nset[0])\n",
" if len(contig) >= min_contig_len:\n",
" contigs.append(contig)\n",
" \n",
" self.contigs = contigs\n",
" return contigs\n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \"\"\"\n",
" utility funciton to calculate the coverage per graph\n",
" assume there is a real path between nodes\n",
" \"\"\"\n",
" def get_path_coverage(self, path):\n",
" pathsum = 0\n",
" pathlen = 0\n",
" for i in range(len(path)):\n",
" n = self.vs[path[i]]\n",
" if (n['compacted']):\n",
" pathsum += n['sum']\n",
" pathlen += n['dist']\n",
" else:\n",
" if i < len(path)-1:\n",
" pathsum += self.edge(self.vs[path[i]], self.vs[path[i+1]])['coverage']\n",
" pathlen += 1\n",
" \n",
" if (pathlen > 0): \n",
" return pathsum/pathlen\n",
" else:\n",
" return 0 \n",
" \n",
" ## simple bubble removal strategy:\n",
" ## a bubble starts at a node with outdegree > 1\n",
" ## it ends if the paths join again\n",
" ## if the paths join at all\n",
" ## if there are alternative paths to the confluent node, AND all nodes in a path have outdegree == 1\n",
" ## then we have a real bubble: remove the nodes in the path(s) with lowest coverage\n",
" ## however, we ignore the case for now that there could be divergent paths emerging from a lower \n",
" ## coverage bubble. \n",
" def remove_bubbles(self):\n",
" max_look_ahead = 100 # maximum number of nodes to look ahead for a junction\n",
" # first check if there is a bubble at all\n",
" # we heavily use graph and set operations here\n",
" nset0 = self.vs.select(_outdegree_gt = 1) # the maximum number of nodes to visit \n",
" counter = 0 \n",
" while (counter < len(nset0)): # max number of iterations possible because we only delete nodes\n",
" counter = counter + 1\n",
" ## we have to update our nodes each time, because nodes could have been deleted already\n",
" nset = self.vs.select(_outdegree_gt = 1)\n",
" if len (nset) < 1: break\n",
" n = nset[0]\n",
" nhoods = self.neighborhood(n.successors(), order=max_look_ahead, mode=\"out\") ## get nodeset in neighborhoods\n",
" for i in range(len(nhoods)):\n",
" for j in range(i+1, len(nhoods)):\n",
" ## if the intersections are non-empty, there is a confluent node\n",
" confluent_node = isect(nhoods[i],nhoods[j])\n",
" if (confluent_node):\n",
" info (str(counter) + ':' +' bubble detected:' + n['name'] +' ---> ' + self.vs[confluent_node]['name'])\n",
" paths = list(map(lambda x: self.get_shortest_paths(x,confluent_node)[0], n.successors()))\n",
" paths = sorted(paths, key= lambda x: (g.get_path_coverage(x),len(x))) # order paths by coverage, then by length\n",
" debug(str(paths))\n",
" keep_this = paths.pop()\n",
" for path in paths:\n",
" if n.index in path: continue ## this is a loop, we cannot deal with this case yet\n",
" del_me = set(path) - set(keep_this) # get a set with shared nodes removed \n",
" # this guarantees also that we do not accidentially delete nodes that are still needed\n",
" # one could decide to only delete linear nodes\n",
" del_me = list(filter(lambda x: self.vs[x].outdegree() == 1 , del_me))\n",
" ## if the graph was compacted already this would delete only compacted nodes\n",
" # del_me = list(filter(lambda x: self.vs[x].indegree() == 1 , del_me))\n",
" \n",
" info (\"deleting the following nodes: \"+ str(del_me))\n",
" self.delete_vertices(del_me) # that will also clean the edges\n",
" \n",
" \n",
" \n",
" \n",
"def isect(lst1, lst2):\n",
" lst3 = [value for value in lst1 if value in lst2]\n",
" if len(lst3) < 1: \n",
" return None\n",
" else:\n",
" return lst3[0]\n",
" \n",
" \n",
"class Node:\n",
" id = 0\n",
" graph = None\n",
" inode = None\n",
" def __init__(self, graph, id=None, sequence=None):\n",
" self.graph = graph\n",
" if sequence:\n",
" self.inode = graph.vs.find(sequence=sequence)\n",
" else: \n",
" if id != None:\n",
" self.inode = graph.vs[id] \n",
" self.id = self.inode['id']\n",
" \n",
" def indegree(self):\n",
" return self.inode.indegree()\n",
" \n",
" def outdegree(self):\n",
" return self.inode.outdegree()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cb64c924-3fca-402c-96d8-388d2dfa01d5",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:adding edge0 --> 1\n"
]
}
],
"source": [
"g = DBGraph('hey joe')\n",
"\n",
"#g.add_kmer('tttt')\n",
"## bubble graph\n",
"g.add('abb','bbb')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "019c49d1-127b-4d80-9e94-8ff612f070ad",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:adding edge0 --> 1\n",
"DEBUG:root:adding edge1 --> 2\n",
"DEBUG:root:adding edge2 --> 3\n",
"DEBUG:root:adding edge3 --> 4\n",
"DEBUG:root:adding edge3 --> 5\n",
"DEBUG:root:adding edge5 --> 6\n",
"DEBUG:root:adding edge6 --> 7\n",
"DEBUG:root:adding edge6 --> 8\n",
"DEBUG:root:adding edge8 --> 9\n",
"DEBUG:root:adding edge9 --> 10\n",
"DEBUG:root:adding edge4 --> 10\n",
"DEBUG:root:adding edge1 --> 11\n",
"DEBUG:root:adding edge1 --> 11\n",
"DEBUG:root:adding edge1 --> 11\n",
"DEBUG:root:adding edge11 --> 12\n",
"DEBUG:root:adding edge11 --> 12\n",
"DEBUG:root:adding edge12 --> 13\n",
"DEBUG:root:adding edge12 --> 13\n",
"DEBUG:root:adding edge13 --> 10\n",
"DEBUG:root:adding edge10 --> 14\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 3,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"g = DBGraph('hey joe')\n",
"\n",
"#g.add_kmer('tttt')\n",
"## bubble graph\n",
"g.add('abb','bbb')\n",
"g.add('bbb','bba')\n",
"g.add('bba','bas')\n",
"g.add('bas','asu')\n",
"g.add('bas','ast')\n",
"g.add('ast','stu')\n",
"g.add('stu','tup')\n",
"g.add('stu','tus')\n",
"g.add('tus','usu')\n",
"g.add('usu','suu')\n",
"\n",
"g.add('asu','suu')\n",
"g.add('bbb','bbA')\n",
"g.add('bbb','bbA')\n",
"g.add('bbb','bbA')\n",
"g.add('bbA','bAs')\n",
"g.add('bbA','bAs')\n",
"g.add('bAs','Asu')\n",
"g.add('bAs','Asu')\n",
"g.add('Asu','suu')\n",
"g.add('suu','uuu')\n",
"\n",
"## alternative start and end nodes\n",
"## g.add(\"NAT\",\"ATG\")\n",
"# g.add(\"NAT\",\"ATG\")\n",
"# g.add(\"NAT\",\"ATG\")\n",
"# g.add(\"XAT\",\"ATG\")\n",
"# g.add(\"ATG\", \"TGA\")\n",
"# g.add(\"ATG\", \"TGA\")\n",
"# g.add(\"ATG\", \"TGA\")\n",
"# g.add(\"ATG\", \"TGA\")\n",
"# g.add(\"ATG\", \"TGA\")\n",
"\n",
"g.viz()\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "803e878d-7fb7-49db-a1e5-0040c37363a3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:0: len(nset0)10\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 0, {'name': 'abb', 'id': 0, 'sequence': 'abb', 'label': '0:abb', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting abb --> bbbigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 0, {'name': 'abb', 'id': 0, 'sequence': 'abb', 'label': '0:abb', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: bbb\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 0, {'name': 'abbb', 'id': 0, 'sequence': 'abbb', 'label': '[0] abbb', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:1: len(nset0)9\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 1, {'name': 'bba', 'id': 2, 'sequence': 'bba', 'label': '2:bba', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting a --> basigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 1, {'name': 'bba', 'id': 2, 'sequence': 'a', 'label': '2:bba', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: bas\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 1, {'name': 'as', 'id': 2, 'sequence': 'as', 'label': '[1] as', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:2: len(nset0)8\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 2, {'name': 'asu', 'id': 4, 'sequence': 'asu', 'label': '4:asu', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 2, {'name': 'asu', 'id': 4, 'sequence': 'asu', 'label': '4:asu', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:3: len(nset0)7\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 3, {'name': 'ast', 'id': 5, 'sequence': 'ast', 'label': '5:ast', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting t --> stuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 3, {'name': 'ast', 'id': 5, 'sequence': 't', 'label': '5:ast', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: stu\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 3, {'name': 'tu', 'id': 5, 'sequence': 'tu', 'label': '[3] tu', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:4: len(nset0)6\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 5, {'name': 'tus', 'id': 8, 'sequence': 'tus', 'label': '8:tus', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting s --> usuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 5, {'name': 'tus', 'id': 8, 'sequence': 's', 'label': '8:tus', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: usu\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 5, {'name': 'su', 'id': 8, 'sequence': 'su', 'label': '[5] su', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:5: len(nset0)4\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 6, {'name': 'suu', 'id': 10, 'sequence': 'suu', 'label': '10:suu', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting u --> uuuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 6, {'name': 'suu', 'id': 10, 'sequence': 'u', 'label': '10:suu', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: uuu\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 6, {'name': 'uu', 'id': 10, 'sequence': 'uu', 'label': '[6] uu', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:6: len(nset0)3\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'bbA', 'id': 11, 'sequence': 'bbA', 'label': '11:bbA', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting A --> bAsigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'bbA', 'id': 11, 'sequence': 'A', 'label': '11:bbA', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: bAs\n",
"DEBUG:root:really compacting As --> Asuigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'As', 'id': 11, 'sequence': 'As', 'label': '[7] As', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 1})\n",
"DEBUG:root:deleting: Asu\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38450>, 7, {'name': 'Asu', 'id': 11, 'sequence': 'Asu', 'label': '[7] Asu', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 2})\n",
"DEBUG:root:7: len(nset0)0\n",
"INFO:root:no more compactable stretches\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"g.compact()\n",
"g.viz()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "bebc8e6e-0972-4cae-aab2-ff2f655ba20d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:adding edge0 --> 1\n",
"DEBUG:root:adding edge1 --> 2\n",
"DEBUG:root:adding edge2 --> 0\n",
"DEBUG:root:0: len(nset0)3\n",
"DEBUG:root:seeking leftPZA\n",
"DEBUG:root:seeking leftAPZ\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38250>, 1, {'name': 'APZ', 'id': 1, 'sequence': 'APZ', 'label': '1:APZ', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting Z --> PZAigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38250>, 1, {'name': 'APZ', 'id': 1, 'sequence': 'Z', 'label': '1:APZ', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: PZA\n",
"DEBUG:root:------------------- we hit a loop, finished\n",
"DEBUG:root:1: len(nset0)1\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38250>, 0, {'name': 'ZAP', 'id': 0, 'sequence': 'ZAP', 'label': '0:ZAP', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:------------------- we hit a loop, finished\n",
"DEBUG:root:2: len(nset0)0\n",
"INFO:root:no more compactable stretches\n",
"DEBUG:root:walk: ZAPZAP\n",
"DEBUG:root:walk: ZA\n",
"DEBUG:root:seq: ZA\n",
"INFO:root:Done\n"
]
},
{
"data": {
"text/plain": [
"['ZAPZA']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g = DBGraph(\"cycle\")\n",
"# cyclic\n",
"g.add(\"ZAP\",\"APZ\")\n",
"g.add(\"APZ\",\"PZA\")\n",
"g.add(\"PZA\",\"ZAP\")\n",
"## this must not give into infinite loop\n",
"g.compact()\n",
"g.assemble() # output: ['ZAPZA']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "9e0f85bb-8bf4-4a0d-bf1e-799fc229db92",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:adding edge0 --> 1\n",
"DEBUG:root:adding edge1 --> 2\n",
"DEBUG:root:adding edge2 --> 3\n",
"DEBUG:root:adding edge3 --> 4\n",
"DEBUG:root:adding edge4 --> 5\n",
"DEBUG:root:adding edge5 --> 6\n",
"DEBUG:root:adding edge6 --> 7\n",
"DEBUG:root:adding edge7 --> 8\n",
"DEBUG:root:adding edge8 --> 9\n",
"DEBUG:root:adding edge9 --> 10\n",
"DEBUG:root:adding edge10 --> 11\n",
"DEBUG:root:adding edge12 --> 13\n",
"DEBUG:root:adding edge13 --> 14\n",
"DEBUG:root:adding edge14 --> 3\n",
"DEBUG:root:adding edge3 --> 4\n",
"DEBUG:root:adding edge4 --> 5\n",
"DEBUG:root:adding edge5 --> 6\n",
"DEBUG:root:adding edge6 --> 7\n",
"DEBUG:root:adding edge7 --> 8\n",
"DEBUG:root:adding edge8 --> 9\n",
"DEBUG:root:adding edge9 --> 10\n",
"DEBUG:root:adding edge10 --> 11\n",
"DEBUG:root:adding edge15 --> 16\n",
"DEBUG:root:adding edge16 --> 17\n",
"DEBUG:root:adding edge17 --> 18\n",
"DEBUG:root:adding edge18 --> 19\n",
"DEBUG:root:adding edge19 --> 20\n",
"DEBUG:root:adding edge20 --> 21\n",
"DEBUG:root:adding edge21 --> 22\n",
"DEBUG:root:adding edge22 --> 23\n",
"DEBUG:root:adding edge23 --> 24\n",
"DEBUG:root:adding edge24 --> 25\n",
"DEBUG:root:adding edge25 --> 26\n",
"DEBUG:root:adding edge26 --> 27\n",
"DEBUG:root:adding edge27 --> 28\n",
"DEBUG:root:adding edge28 --> 29\n",
"DEBUG:root:adding edge29 --> 30\n",
"DEBUG:root:adding edge30 --> 31\n",
"DEBUG:root:adding edge31 --> 32\n",
"DEBUG:root:adding edge32 --> 33\n",
"DEBUG:root:adding edge33 --> 34\n",
"DEBUG:root:adding edge34 --> 35\n",
"DEBUG:root:adding edge35 --> 36\n",
"DEBUG:root:adding edge36 --> 37\n",
"DEBUG:root:adding edge37 --> 38\n",
"DEBUG:root:adding edge38 --> 39\n",
"DEBUG:root:adding edge39 --> 40\n",
"DEBUG:root:adding edge40 --> 41\n",
"DEBUG:root:adding edge41 --> 42\n",
"DEBUG:root:adding edge42 --> 43\n",
"DEBUG:root:adding edge43 --> 44\n",
"DEBUG:root:adding edge44 --> 45\n",
"DEBUG:root:adding edge45 --> 15\n",
"DEBUG:root:adding edge15 --> 16\n",
"DEBUG:root:adding edge16 --> 46\n",
"DEBUG:root:adding edge46 --> 47\n",
"DEBUG:root:adding edge47 --> 48\n",
"DEBUG:root:adding edge48 --> 49\n",
"DEBUG:root:adding edge49 --> 50\n",
"DEBUG:root:adding edge50 --> 51\n",
"DEBUG:root:adding edge51 --> 52\n",
"DEBUG:root:adding edge52 --> 53\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 6,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"# test construction from file\n",
"g = DBGraph('file')\n",
"g.add_from_file('test.fasta')\n",
"g.viz()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f7532d51-e4f3-45e7-a6fd-657a372d99f0",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:0: len(nset0)51\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abc'), 'id': 0, 'sequence': Seq('abc'), 'label': Seq('0:abc'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting abc --> bcdigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abc'), 'id': 0, 'sequence': Seq('abc'), 'label': Seq('0:abc'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: bcd\n",
"DEBUG:root:really compacting abcd --> cdeigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abcd'), 'id': 0, 'sequence': Seq('abcd'), 'label': Seq('[0] abcd'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:deleting: cde\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 0, {'name': Seq('abcde'), 'id': 0, 'sequence': Seq('abcde'), 'label': Seq('[0] abcde'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n",
"DEBUG:root:1: len(nset0)48\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('def'), 'id': 3, 'sequence': Seq('def'), 'label': Seq('3:def'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting f --> efgigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('def'), 'id': 3, 'sequence': Seq('f'), 'label': Seq('3:def'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: efg\n",
"DEBUG:root:really compacting fg --> fghigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fg'), 'id': 3, 'sequence': Seq('fg'), 'label': Seq('[1] fg'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 1})\n",
"DEBUG:root:deleting: fgh\n",
"DEBUG:root:really compacting fgh --> ghiigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fgh'), 'id': 3, 'sequence': Seq('fgh'), 'label': Seq('[1] fgh'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 2})\n",
"DEBUG:root:deleting: ghi\n",
"DEBUG:root:really compacting fghi --> hijigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghi'), 'id': 3, 'sequence': Seq('fghi'), 'label': Seq('[1] fghi'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 6, 'dist': 3})\n",
"DEBUG:root:deleting: hij\n",
"DEBUG:root:really compacting fghij --> ijkigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghij'), 'id': 3, 'sequence': Seq('fghij'), 'label': Seq('[1] fghij'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 8, 'dist': 4})\n",
"DEBUG:root:deleting: ijk\n",
"DEBUG:root:really compacting fghijk --> jkligraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijk'), 'id': 3, 'sequence': Seq('fghijk'), 'label': Seq('[1] fghijk'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 10, 'dist': 5})\n",
"DEBUG:root:deleting: jkl\n",
"DEBUG:root:really compacting fghijkl --> klmigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijkl'), 'id': 3, 'sequence': Seq('fghijkl'), 'label': Seq('[1] fghijkl'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 12, 'dist': 6})\n",
"DEBUG:root:deleting: klm\n",
"DEBUG:root:really compacting fghijklm --> lmnigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijklm'), 'id': 3, 'sequence': Seq('fghijklm'), 'label': Seq('[1] fghijklm'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 14, 'dist': 7})\n",
"DEBUG:root:deleting: lmn\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 1, {'name': Seq('fghijklmn'), 'id': 3, 'sequence': Seq('fghijklmn'), 'label': Seq('[1] fghijklmn'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 16, 'dist': 8})\n",
"DEBUG:root:2: len(nset0)40\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abd'), 'id': 12, 'sequence': Seq('abd'), 'label': Seq('12:abd'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting abd --> bddigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abd'), 'id': 12, 'sequence': Seq('abd'), 'label': Seq('12:abd'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: bdd\n",
"DEBUG:root:really compacting abdd --> ddeigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abdd'), 'id': 12, 'sequence': Seq('abdd'), 'label': Seq('[2] abdd'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:deleting: dde\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 2, {'name': Seq('abdde'), 'id': 12, 'sequence': Seq('abdde'), 'label': Seq('[2] abdde'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n",
"DEBUG:root:3: len(nset0)37\n",
"DEBUG:root:seeking left_th\n",
"DEBUG:root:seeking leftr_t\n",
"DEBUG:root:seeking lefter_\n",
"DEBUG:root:seeking leftver\n",
"DEBUG:root:seeking leftove\n",
"DEBUG:root:seeking left_ov\n",
"DEBUG:root:seeking lefts_o\n",
"DEBUG:root:seeking leftps_\n",
"DEBUG:root:seeking leftmps\n",
"DEBUG:root:seeking leftump\n",
"DEBUG:root:seeking leftjum\n",
"DEBUG:root:seeking left_ju\n",
"DEBUG:root:seeking leftx_j\n",
"DEBUG:root:seeking leftox_\n",
"DEBUG:root:seeking leftfox\n",
"DEBUG:root:seeking left_fo\n",
"DEBUG:root:seeking leftn_f\n",
"DEBUG:root:seeking leftwn_\n",
"DEBUG:root:seeking leftown\n",
"DEBUG:root:seeking leftrow\n",
"DEBUG:root:seeking leftbro\n",
"DEBUG:root:seeking left_br\n",
"DEBUG:root:seeking leftk_b\n",
"DEBUG:root:seeking leftck_\n",
"DEBUG:root:seeking leftick\n",
"DEBUG:root:seeking leftuic\n",
"DEBUG:root:seeking leftqui\n",
"DEBUG:root:seeking left_qu\n",
"DEBUG:root:seeking lefte_q\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_q'), 'id': 17, 'sequence': Seq('e_q'), 'label': Seq('17:e_q'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting q --> _quigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_q'), 'id': 17, 'sequence': Seq('q'), 'label': Seq('17:e_q'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: _qu\n",
"DEBUG:root:really compacting qu --> quiigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('qu'), 'id': 17, 'sequence': Seq('qu'), 'label': Seq('[5] qu'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:deleting: qui\n",
"DEBUG:root:really compacting qui --> uicigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('qui'), 'id': 17, 'sequence': Seq('qui'), 'label': Seq('[5] qui'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n",
"DEBUG:root:deleting: uic\n",
"DEBUG:root:really compacting quic --> ickigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quic'), 'id': 17, 'sequence': Seq('quic'), 'label': Seq('[5] quic'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 3})\n",
"DEBUG:root:deleting: ick\n",
"DEBUG:root:really compacting quick --> ck_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick'), 'id': 17, 'sequence': Seq('quick'), 'label': Seq('[5] quick'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 4})\n",
"DEBUG:root:deleting: ck_\n",
"DEBUG:root:really compacting quick_ --> k_bigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_'), 'id': 17, 'sequence': Seq('quick_'), 'label': Seq('[5] quick_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 5, 'dist': 5})\n",
"DEBUG:root:deleting: k_b\n",
"DEBUG:root:really compacting quick_b --> _brigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_b'), 'id': 17, 'sequence': Seq('quick_b'), 'label': Seq('[5] quick_b'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 6, 'dist': 6})\n",
"DEBUG:root:deleting: _br\n",
"DEBUG:root:really compacting quick_br --> broigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_br'), 'id': 17, 'sequence': Seq('quick_br'), 'label': Seq('[5] quick_br'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 7, 'dist': 7})\n",
"DEBUG:root:deleting: bro\n",
"DEBUG:root:really compacting quick_bro --> rowigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_bro'), 'id': 17, 'sequence': Seq('quick_bro'), 'label': Seq('[5] quick_bro'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 8, 'dist': 8})\n",
"DEBUG:root:deleting: row\n",
"DEBUG:root:really compacting quick_brow --> ownigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brow'), 'id': 17, 'sequence': Seq('quick_brow'), 'label': Seq('[5] quick_brow'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 9, 'dist': 9})\n",
"DEBUG:root:deleting: own\n",
"DEBUG:root:really compacting quick_brown --> wn_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown'), 'id': 17, 'sequence': Seq('quick_brown'), 'label': Seq('[5] quick_brown'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 10, 'dist': 10})\n",
"DEBUG:root:deleting: wn_\n",
"DEBUG:root:really compacting quick_brown_ --> n_figraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_'), 'id': 17, 'sequence': Seq('quick_brown_'), 'label': Seq('[5] quick_brown_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 11, 'dist': 11})\n",
"DEBUG:root:deleting: n_f\n",
"DEBUG:root:really compacting quick_brown_f --> _foigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_f'), 'id': 17, 'sequence': Seq('quick_brown_f'), 'label': Seq('[5] quick_brown_f'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 12, 'dist': 12})\n",
"DEBUG:root:deleting: _fo\n",
"DEBUG:root:really compacting quick_brown_fo --> foxigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fo'), 'id': 17, 'sequence': Seq('quick_brown_fo'), 'label': Seq('[5] quick_brown_fo'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 13, 'dist': 13})\n",
"DEBUG:root:deleting: fox\n",
"DEBUG:root:really compacting quick_brown_fox --> ox_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox'), 'id': 17, 'sequence': Seq('quick_brown_fox'), 'label': Seq('[5] quick_brown_fox'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 14, 'dist': 14})\n",
"DEBUG:root:deleting: ox_\n",
"DEBUG:root:really compacting quick_brown_fox_ --> x_jigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_'), 'id': 17, 'sequence': Seq('quick_brown_fox_'), 'label': Seq('[5] quick_brown_fox_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 15, 'dist': 15})\n",
"DEBUG:root:deleting: x_j\n",
"DEBUG:root:really compacting quick_brown_fox_j --> _juigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_j'), 'id': 17, 'sequence': Seq('quick_brown_fox_j'), 'label': Seq('[5] quick_brown_fox_j'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 16, 'dist': 16})\n",
"DEBUG:root:deleting: _ju\n",
"DEBUG:root:really compacting quick_brown_fox_ju --> jumigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_ju'), 'id': 17, 'sequence': Seq('quick_brown_fox_ju'), 'label': Seq('[5] quick_brown_fox_ju'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 17, 'dist': 17})\n",
"DEBUG:root:deleting: jum\n",
"DEBUG:root:really compacting quick_brown_fox_jum --> umpigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jum'), 'id': 17, 'sequence': Seq('quick_brown_fox_jum'), 'label': Seq('[5] quick_brown_fox_jum'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 18, 'dist': 18})\n",
"DEBUG:root:deleting: ump\n",
"DEBUG:root:really compacting quick_brown_fox_jump --> mpsigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jump'), 'id': 17, 'sequence': Seq('quick_brown_fox_jump'), 'label': Seq('[5] quick_brown_fox_jump'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 19, 'dist': 19})\n",
"DEBUG:root:deleting: mps\n",
"DEBUG:root:really compacting quick_brown_fox_jumps --> ps_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps'), 'label': Seq('[5] quick_brown_fox_jumps'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 20, 'dist': 20})\n",
"DEBUG:root:deleting: ps_\n",
"DEBUG:root:really compacting quick_brown_fox_jumps_ --> s_oigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_'), 'label': Seq('[5] quick_brown_fox_jumps_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 21, 'dist': 21})\n",
"DEBUG:root:deleting: s_o\n",
"DEBUG:root:really compacting quick_brown_fox_jumps_o --> _ovigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_o'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_o'), 'label': Seq('[5] quick_brown_fox_jumps_o'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 22, 'dist': 22})\n",
"DEBUG:root:deleting: _ov\n",
"DEBUG:root:really compacting quick_brown_fox_jumps_ov --> oveigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_ov'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_ov'), 'label': Seq('[5] quick_brown_fox_jumps_ov'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 23, 'dist': 23})\n",
"DEBUG:root:deleting: ove\n",
"DEBUG:root:really compacting quick_brown_fox_jumps_ove --> verigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_ove'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_ove'), 'label': Seq('[5] quick_brown_fox_jumps_ove'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 24, 'dist': 24})\n",
"DEBUG:root:deleting: ver\n",
"DEBUG:root:really compacting quick_brown_fox_jumps_over --> er_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over'), 'label': Seq('[5] quick_brown_fox_jumps_over'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 25, 'dist': 25})\n",
"DEBUG:root:deleting: er_\n",
"DEBUG:root:really compacting quick_brown_fox_jumps_over_ --> r_tigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over_'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over_'), 'label': Seq('[5] quick_brown_fox_jumps_over_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 26, 'dist': 26})\n",
"DEBUG:root:deleting: r_t\n",
"DEBUG:root:really compacting quick_brown_fox_jumps_over_t --> _thigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over_t'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over_t'), 'label': Seq('[5] quick_brown_fox_jumps_over_t'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 27, 'dist': 27})\n",
"DEBUG:root:deleting: _th\n",
"DEBUG:root:really compacting quick_brown_fox_jumps_over_th --> theigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('quick_brown_fox_jumps_over_th'), 'id': 17, 'sequence': Seq('quick_brown_fox_jumps_over_th'), 'label': Seq('[5] quick_brown_fox_jumps_over_th'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 28, 'dist': 28})\n",
"DEBUG:root:deleting: the\n",
"DEBUG:root:------------------- we hit a loop, finished\n",
"DEBUG:root:4: len(nset0)7\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_l'), 'id': 46, 'sequence': Seq('e_l'), 'label': Seq('46:e_l'), 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting l --> _laigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('e_l'), 'id': 46, 'sequence': Seq('l'), 'label': Seq('46:e_l'), 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: _la\n",
"DEBUG:root:really compacting la --> lazigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('la'), 'id': 46, 'sequence': Seq('la'), 'label': Seq('[5] la'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:deleting: laz\n",
"DEBUG:root:really compacting laz --> azyigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('laz'), 'id': 46, 'sequence': Seq('laz'), 'label': Seq('[5] laz'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n",
"DEBUG:root:deleting: azy\n",
"DEBUG:root:really compacting lazy --> zy_igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy'), 'id': 46, 'sequence': Seq('lazy'), 'label': Seq('[5] lazy'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 3})\n",
"DEBUG:root:deleting: zy_\n",
"DEBUG:root:really compacting lazy_ --> y_digraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_'), 'id': 46, 'sequence': Seq('lazy_'), 'label': Seq('[5] lazy_'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 4, 'dist': 4})\n",
"DEBUG:root:deleting: y_d\n",
"DEBUG:root:really compacting lazy_d --> _doigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_d'), 'id': 46, 'sequence': Seq('lazy_d'), 'label': Seq('[5] lazy_d'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 5, 'dist': 5})\n",
"DEBUG:root:deleting: _do\n",
"DEBUG:root:really compacting lazy_do --> dogigraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_do'), 'id': 46, 'sequence': Seq('lazy_do'), 'label': Seq('[5] lazy_do'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 6, 'dist': 6})\n",
"DEBUG:root:deleting: dog\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb50ff38650>, 5, {'name': Seq('lazy_dog'), 'id': 46, 'sequence': Seq('lazy_dog'), 'label': Seq('[5] lazy_dog'), 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 7, 'dist': 7})\n",
"DEBUG:root:5: len(nset0)0\n",
"INFO:root:no more compactable stretches\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"g.compact()\n",
"g.viz()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "3c8389d8-f81c-45d1-aea6-29817e010217",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:1: bubble detected:he_ ---> lazy_dog\n",
"DEBUG:root:[[5], [4, 3, 5]]\n",
"INFO:root:deleting the following nodes: []\n",
"DEBUG:root:walk: abcdeabcde\n",
"DEBUG:root:walk: fghijklmn\n",
"DEBUG:root:seq: fghijklmn\n",
"INFO:root:Done\n",
"DEBUG:root:walk: abddeabdde\n",
"INFO:root:Done\n",
"DEBUG:root:walk: quick_brown_fox_jumps_over_thequick_brown_fox_jumps_over_the\n",
"DEBUG:root:walk: he_\n",
"DEBUG:root:seq: _\n",
"DEBUG:root:walk: lazy_dog\n",
"DEBUG:root:seq: lazy_dog\n",
"INFO:root:Done\n"
]
},
{
"data": {
"text/plain": [
"[Seq('abcdefghijklmn'),\n",
" Seq('abdde'),\n",
" Seq('quick_brown_fox_jumps_over_the_lazy_dog')]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g.remove_bubbles()\n",
"g.assemble() ## [Seq('12345678'), Seq('abcdefghijklmnopqrstuvwXYZ')] \n",
"# output: [Seq('abcdefghijklmn'),\n",
"# Seq('abdde'),\n",
"# Seq('quick_brown_fox_jumps_over_the_lazy_dog')]\n",
"# no bubble will be deleted because it is indeed a loop"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "51090d0b-3457-46fd-ac00-f74a643e8269",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:adding edge0 --> 1\n",
"DEBUG:root:adding edge1 --> 2\n",
"DEBUG:root:adding edge1 --> 2\n",
"DEBUG:root:adding edge1 --> 2\n",
"DEBUG:root:adding edge1 --> 2\n",
"DEBUG:root:adding edge2 --> 3\n",
"DEBUG:root:adding edge2 --> 4\n",
"DEBUG:root:adding edge5 --> 6\n",
"DEBUG:root:adding edge5 --> 6\n",
"DEBUG:root:adding edge6 --> 7\n",
"DEBUG:root:adding edge8 --> 8\n",
"DEBUG:root:adding edge8 --> 8\n",
"DEBUG:root:adding edge8 --> 8\n",
"DEBUG:root:adding edge8 --> 8\n",
"DEBUG:root:adding edge8 --> 8\n",
"DEBUG:root:adding edge8 --> 9\n",
"DEBUG:root:adding edge8 --> 9\n",
"DEBUG:root:adding edge9 --> 10\n",
"DEBUG:root:adding edge10 --> 11\n",
"DEBUG:root:adding edge11 --> 8\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
" \n",
"\n",
" \n",
"g = DBGraph('test more stuff')\n",
"g.add(\"TGA\",\"GAA\")\n",
"g.add(\"GAA\",\"AAT\")\n",
"g.add(\"GAA\",\"AAT\")\n",
"g.add(\"GAA\",\"AAT\")\n",
"g.add(\"GAA\",\"AAT\")\n",
"\n",
"g.add(\"AAT\",\"ATT\")\n",
"g.add(\"AAT\",\"ATC\")\n",
"\n",
"# short linear graph\n",
"g.add(\"POP\",\"OPA\")\n",
"g.add('POP','OPA')\n",
"g.add(\"OPA\",\"PAM\")\n",
"\n",
"\n",
"\n",
"## self repeat + cyclic graph\n",
"g.add(\"XXX\",\"XXX\")\n",
"g.add(\"XXX\",\"XXX\")\n",
"g.add(\"XXX\",\"XXX\")\n",
"g.add(\"XXX\",\"XXX\")\n",
"g.add(\"XXX\",\"XXX\")\n",
"g.add(\"XXX\",\"XXY\")\n",
"g.add(\"XXX\",\"XXY\")\n",
"\n",
"g.add(\"XXY\",\"XYX\")\n",
"g.add(\"XYX\",\"YXX\")\n",
"g.add(\"YXX\",\"XXX\")\n",
"g.viz()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "3bbab911-a050-4ffa-a2c9-f0af11c4d6d4",
"metadata": {
"scrolled": true,
"tags": []
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:0: len(nset0)7\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGA', 'id': 0, 'sequence': 'TGA', 'label': '0:TGA', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting TGA --> GAAigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGA', 'id': 0, 'sequence': 'TGA', 'label': '0:TGA', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: GAA\n",
"DEBUG:root:really compacting TGAA --> AATigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGAA', 'id': 0, 'sequence': 'TGAA', 'label': '[0] TGAA', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:deleting: AAT\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 0, {'name': 'TGAAT', 'id': 0, 'sequence': 'TGAAT', 'label': '[0] TGAAT', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 5, 'dist': 2})\n",
"DEBUG:root:1: len(nset0)5\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POP', 'id': 5, 'sequence': 'POP', 'label': '5:POP', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting POP --> OPAigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POP', 'id': 5, 'sequence': 'POP', 'label': '5:POP', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: OPA\n",
"DEBUG:root:really compacting POPA --> PAMigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POPA', 'id': 5, 'sequence': 'POPA', 'label': '[3] POPA', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 1})\n",
"DEBUG:root:deleting: PAM\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 3, {'name': 'POPAM', 'id': 5, 'sequence': 'POPAM', 'label': '[3] POPAM', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 2})\n",
"DEBUG:root:2: len(nset0)3\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'XXY', 'id': 9, 'sequence': 'XXY', 'label': '9:XXY', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting Y --> XYXigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'XXY', 'id': 9, 'sequence': 'Y', 'label': '9:XXY', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: XYX\n",
"DEBUG:root:really compacting YX --> YXXigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'YX', 'id': 9, 'sequence': 'YX', 'label': '[5] YX', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:deleting: YXX\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3e50>, 5, {'name': 'YXX', 'id': 9, 'sequence': 'YXX', 'label': '[5] YXX', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n",
"DEBUG:root:3: len(nset0)0\n",
"INFO:root:no more compactable stretches\n",
"DEBUG:root:walk: TGAATTGAAT\n",
"DEBUG:root:walk: ATT\n",
"DEBUG:root:seq: T\n",
"INFO:root:Done\n",
"DEBUG:root:walk: POPAMPOPAM\n",
"INFO:root:Done\n",
"DEBUG:root:walk: ATCATC\n",
"INFO:root:Done\n",
"DEBUG:root:walk: XXXXXX\n",
"DEBUG:root:walk: YXX\n",
"DEBUG:root:seq: YXX\n",
"INFO:root:Done\n"
]
},
{
"data": {
"text/plain": [
"['TGAATT', 'POPAM', 'XXXYXX']"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g.compact()\n",
"g.assemble() # should give: ['TGAATT', 'POPAM', 'XXXYXX']\n",
" \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "14edba89-d3d9-4376-b704-b95e4b397992",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:adding edge0 --> 1\n",
"DEBUG:root:adding edge1 --> 2\n",
"DEBUG:root:adding edge2 --> 3\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 11,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"g = DBGraph('hey joe')\n",
"g.add('123','234')\n",
"g.add('234','345')\n",
"g.add('345','456')\n",
"g.viz()\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "435eab49-0949-4a85-9def-902e46f6209d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:walk: 123123\n",
"DEBUG:root:walk: 234\n",
"DEBUG:root:seq: 4\n",
"DEBUG:root:walk: 345\n",
"DEBUG:root:seq: 5\n",
"DEBUG:root:walk: 456\n",
"DEBUG:root:seq: 6\n",
"INFO:root:Done\n"
]
},
{
"data": {
"text/plain": [
"['123456']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g.assemble() # simple graphs can be assembled directly: 123456\n"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "a421dc34-cfe7-463d-87dd-8bf623d24e6f",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:0: len(nset0)3\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '123', 'id': 0, 'sequence': '123', 'label': '0:123', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting 123 --> 234igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '123', 'id': 0, 'sequence': '123', 'label': '0:123', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: 234\n",
"DEBUG:root:really compacting 1234 --> 345igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '1234', 'id': 0, 'sequence': '1234', 'label': '[0] 1234', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:deleting: 345\n",
"DEBUG:root:really compacting 12345 --> 456igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '12345', 'id': 0, 'sequence': '12345', 'label': '[0] 12345', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n",
"DEBUG:root:deleting: 456\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3c50>, 0, {'name': '123456', 'id': 0, 'sequence': '123456', 'label': '[0] 123456', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 3, 'dist': 3})\n",
"DEBUG:root:1: len(nset0)0\n",
"INFO:root:no more compactable stretches\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 13,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"g.compact()\n",
"g.viz()\n",
"\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "c5ab5086-11b5-4ab3-89e5-b12af36148fb",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:adding edge0 --> 1\n",
"DEBUG:root:adding edge1 --> 2\n",
"DEBUG:root:adding edge2 --> 3\n",
"DEBUG:root:adding edge2 --> 4\n",
"DEBUG:root:adding edge3 --> 5\n",
"DEBUG:root:adding edge4 --> 6\n",
"DEBUG:root:adding edge5 --> 7\n",
"DEBUG:root:adding edge6 --> 8\n",
"DEBUG:root:adding edge7 --> 1\n",
"DEBUG:root:adding edge8 --> 1\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 14,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"g = DBGraph('double loop')\n",
"g.add('0ab','abc')\n",
"g.add('abc','bcd')\n",
"g.add('bcd','cde')\n",
"g.add('bcd','cdE')\n",
"g.add('cde','dea')\n",
"g.add('cdE','dEa')\n",
"g.add('dea','eab')\n",
"g.add('dEa','Eab')\n",
"g.add('eab','abc')\n",
"g.add('Eab','abc')\n",
"g.viz()\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "513dde3e-f4eb-490d-bcf6-abee1382f6c3",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:0: len(nset0)8\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 0, {'name': '0ab', 'id': 0, 'sequence': '0ab', 'label': '0:0ab', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 0, {'name': '0ab', 'id': 0, 'sequence': '0ab', 'label': '0:0ab', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:1: len(nset0)7\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'abc', 'id': 1, 'sequence': 'abc', 'label': '1:abc', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting c --> bcdigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'abc', 'id': 1, 'sequence': 'c', 'label': '1:abc', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: bcd\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'cd', 'id': 1, 'sequence': 'cd', 'label': '[1] cd', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:2: len(nset0)6\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'cde', 'id': 3, 'sequence': 'cde', 'label': '3:cde', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting e --> deaigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'cde', 'id': 3, 'sequence': 'e', 'label': '3:cde', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: dea\n",
"DEBUG:root:really compacting ea --> eabigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'ea', 'id': 3, 'sequence': 'ea', 'label': '[2] ea', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:deleting: eab\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 2, {'name': 'eab', 'id': 3, 'sequence': 'eab', 'label': '[2] eab', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n",
"DEBUG:root:3: len(nset0)3\n",
"DEBUG:root:Start compacting: igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'cdE', 'id': 4, 'sequence': 'cdE', 'label': '4:cdE', 'visited': False, 'compacted': False, 'coverage': 1, 'sum': 0, 'dist': 0})1\n",
"DEBUG:root:really compacting E --> dEaigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'cdE', 'id': 4, 'sequence': 'E', 'label': '4:cdE', 'visited': False, 'compacted': True, 'coverage': 1, 'sum': 0, 'dist': 0})\n",
"DEBUG:root:deleting: dEa\n",
"DEBUG:root:really compacting Ea --> Eabigraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'Ea', 'id': 4, 'sequence': 'Ea', 'label': '[3] Ea', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 1, 'dist': 1})\n",
"DEBUG:root:deleting: Eab\n",
"DEBUG:root:-------------- finished with node igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 3, {'name': 'Eab', 'id': 4, 'sequence': 'Eab', 'label': '[3] Eab', 'visited': True, 'compacted': True, 'coverage': 1, 'sum': 2, 'dist': 2})\n",
"DEBUG:root:4: len(nset0)0\n",
"INFO:root:no more compactable stretches\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 15,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"g.compact()\n",
"g.viz()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "e0840c51-cf2a-4370-ab3e-ca935842afc4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:1: bubble detected:cd ---> eab\n",
"DEBUG:root:[[2], [3, 1, 2]]\n",
"INFO:root:deleting the following nodes: []\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n"
],
"text/plain": [
""
]
},
"execution_count": 16,
"metadata": {
"image/svg+xml": {
"isolated": true
}
},
"output_type": "execute_result"
}
],
"source": [
"g.remove_bubbles()\n",
"g.viz()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "94913ca2-943a-4d8a-b78f-8327ed6cb1c2",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"DEBUG:root:walk: 0ab0ab\n",
"DEBUG:root:walk: cd\n",
"DEBUG:root:seq: cd\n",
"DEBUG:root:walk: eab\n",
"DEBUG:root:seq: eab\n",
"DEBUG:root:has unvisited next: [igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'cd', 'id': 1, 'sequence': 'cd', 'label': '1:cd', 'visited': True, 'compacted': True, 'coverage': 1.0, 'sum': 1, 'dist': 1})]\n",
"DEBUG:root:has unvisited next: [igraph.Vertex(<__main__.DBGraph object at 0x7fb53c3f3d50>, 1, {'name': 'cd', 'id': 1, 'sequence': 'cd', 'label': '1:cd', 'visited': True, 'compacted': True, 'coverage': 1.0, 'sum': 1, 'dist': 1})]\n",
"DEBUG:root:walk: cd\n",
"DEBUG:root:seq: cd\n",
"DEBUG:root:walk: Eab\n",
"DEBUG:root:seq: Eab\n",
"INFO:root:Done\n"
]
},
{
"data": {
"text/plain": [
"['0abcdeabcdEab']"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g.assemble() # should give: ['0abcdeabcdEab']"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8126ad58-9951-4e3f-bc58-5981b6fc3211",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc39e14c-30eb-42d9-b4bc-d279cfd2223d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}