{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def rotations(t):\n", " ''' Return list of rotations of input string t '''\n", " tt = t * 2\n", " return [ tt[i:i+len(t)] for i in range(0, len(t)) ]\n", "\n", "def bwm(t):\n", " ''' Return lexicographically sorted list of t’s rotations '''\n", " return sorted(rotations(t))\n", "\n", "def bwtViaBwm(t):\n", " ''' Given T, returns BWT(T) by way of the BWM '''\n", " return ''.join(map(lambda x: x[-1], bwm(t)))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'abba$aa'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t = 'abaaba$'\n", "b = bwtViaBwm(t)\n", "b" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def rankBwt(bw):\n", " ''' Given BWT string bw, return parallel list of B-ranks. Also\n", " returns tots: map from character to # times it appears. '''\n", " tots = dict()\n", " ranks = []\n", " for c in bw:\n", " if c not in tots:\n", " tots[c] = 0\n", " ranks.append(tots[c])\n", " tots[c] += 1\n", " return ranks, tots" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('a', 0), ('b', 0), ('b', 1), ('a', 1), ('$', 0), ('a', 2), ('a', 3)]\n" ] } ], "source": [ "ranks, tots = rankBwt(b)\n", "print zip(b, ranks) # print characters of BWT(T) in order, along with rank" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def firstCol(tots):\n", " ''' Return map from character to the range of rows prefixed by\n", " the character. '''\n", " first = {}\n", " totc = 0\n", " for c, count in sorted(tots.items()):\n", " first[c] = (totc, totc + count)\n", " totc += count\n", " return first" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'$': (0, 1), 'a': (1, 5), 'b': (5, 7)}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "firstCol(tots)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "$abaaba\n", "a$abaab\n", "aaba$ab\n", "aba$aba\n", "abaaba$\n", "ba$abaa\n", "baaba$a\n" ] } ], "source": [ "# confirm that the representation of the first column above is sensible\n", "print('\\n'.join(bwm(t)))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def reverseBwt(bw):\n", " ''' Make T from BWT(T) '''\n", " ranks, tots = rankBwt(bw)\n", " first = firstCol(tots)\n", " rowi = 0 # start in first row\n", " t = '$' # start with rightmost character\n", " while bw[rowi] != '$':\n", " c = bw[rowi]\n", " t = c + t # prepend to answer\n", " # jump to row that starts with c of same rank\n", " rowi = first[c][0] + ranks[rowi]\n", " return t" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'abaaba$'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reverseBwt(b)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'In_the_jingle_jangle_morning$'" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reverseBwt(bwtViaBwm('In_the_jingle_jangle_morning$'))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }