{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def de_bruijn_ize(st, k):\n", " \"\"\" Return a list holding, for each k-mer, its left\n", " k-1-mer and its right k-1-mer in a pair \"\"\"\n", " edges = []\n", " nodes = set()\n", " for i in range(len(st) - k + 1):\n", " edges.append((st[i:i+k-1], st[i+1:i+k]))\n", " nodes.add(st[i:i+k-1])\n", " nodes.add(st[i+1:i+k])\n", " return nodes, edges" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "nodes, edges = de_bruijn_ize(\"ACGCGTCG\", 3)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'AC', 'CG', 'GC', 'GT', 'TC'}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nodes " ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[('AC', 'CG'),\n", " ('CG', 'GC'),\n", " ('GC', 'CG'),\n", " ('CG', 'GT'),\n", " ('GT', 'TC'),\n", " ('TC', 'CG')]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "edges " ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def visualize_de_bruijn(st, k):\n", " \"\"\" Visualize a directed multigraph using graphviz \"\"\"\n", " nodes, edges = de_bruijn_ize(st, k)\n", " dot_str = 'digraph \"DeBruijn graph\" {\\n'\n", " for node in nodes:\n", " dot_str += ' %s [label=\"%s\"] ;\\n' % (node, node)\n", " for src, dst in edges:\n", " dot_str += ' %s -> %s ;\\n' % (src, dst)\n", " return dot_str + '}\\n'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# might have to do this first:\n", "# %install_ext https://raw.github.com/cjdrake/ipython-magic/master/gvmagic.py\n", "%load_ext gvmagic" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "DeBruijn graph\n", "\n", "\n", "GC\n", "\n", "GC\n", "\n", "\n", "CG\n", "\n", "CG\n", "\n", "\n", "GC->CG\n", "\n", "\n", "\n", "\n", "AC\n", "\n", "AC\n", "\n", "\n", "AC->CG\n", "\n", "\n", "\n", "\n", "GT\n", "\n", "GT\n", "\n", "\n", "TC\n", "\n", "TC\n", "\n", "\n", "GT->TC\n", "\n", "\n", "\n", "\n", "CG->GC\n", "\n", "\n", "\n", "\n", "CG->GT\n", "\n", "\n", "\n", "\n", "TC->CG\n", "\n", "\n", "\n", "\n", "\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%dotstr visualize_de_bruijn(\"ACGCGTCG\", 3)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }