{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Suffix tree built with simple O(m^2)-time algorithm.\n", "class SuffixTree(object):\n", " \n", " class Node(object):\n", " def __init__(self, depth, off, ln, lab=None):\n", " self.depth = depth\n", " self.off = off # offset into T of substring\n", " self.ln = ln # length of substring\n", " self.out = {} # outgoing edges; maps characters to nodes\n", " \n", " def __init__(self, t):\n", " \"\"\" Make suffix tree, without suffix links, from s in quadratic time\n", " and linear space \"\"\"\n", " t += '$'\n", " self.t = t\n", " self.root = self.Node(0, 0, 0, None)\n", " self.root.out[t[0]] = self.Node(len(t), 0, len(t), t)\n", " self.nodes = []\n", " for i in range(1, len(t)):\n", " cur = self.root\n", " j = i\n", " while j < len(t):\n", " if t[j] in cur.out:\n", " child = cur.out[t[j]]\n", " lab = t[child.off:child.off+child.ln]\n", " k = j+1 # Walk along edge\n", " while k-j < len(lab) and t[k] == lab[k-j]:\n", " k += 1\n", " if k-j == len(lab):\n", " cur = child # exhausted the edge\n", " j = k\n", " else:\n", " # fell off in middle of edge\n", " cExist, cNew = lab[k-j], t[k]\n", " mid = self.Node(cur.depth + k-j, child.off, k-j, lab[:k-j])\n", " mid.out[cNew] = self.Node(mid.depth + len(t[k:]), k, len(t[k:]), t[k:])\n", " self.nodes.append(mid)\n", " self.nodes.append(mid.out[cNew])\n", " mid.out[cExist] = child\n", " child.off += (k-j)\n", " child.ln -= (k-j)\n", " cur.out[t[j]] = mid\n", " else:\n", " # Create a new edge hanging off of this node\n", " cur.out[t[j]] = self.Node(cur.depth + len(t[j:]), j, len(t[j:]), t[j:])\n", " self.nodes.append(cur.out[t[j]])\n", " \n", " def saLcp(self):\n", " # Return suffix array and an LCP1 array corresponding to this\n", " # suffix tree. self.root is root, self.t is the text.\n", " self.minSinceLeaf = 0\n", " sa, lcp1 = [], []\n", " def __visit(n):\n", " if len(n.out) == 0:\n", " # leaf node, record offset and LCP1 with previous leaf\n", " sa.append(len(self.t) - n.depth)\n", " lcp1.append(self.minSinceLeaf)\n", " # reset LCP1 to depth of this leaf\n", " self.minSinceLeaf = n.depth\n", " # visit children in lexicographical order\n", " for c, child in sorted(n.out.items()):\n", " __visit(child)\n", " # after each child visit, perhaps decrease\n", " # minimum-depth-since-last-leaf value\n", " self.minSinceLeaf = min(self.minSinceLeaf, n.depth)\n", " __visit(self.root)\n", " return sa, lcp1[1:]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# example from the lecture notes\n", "st = SuffixTree('abaaba')\n", "sa, lcp1 = st.saLcp()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "([6, 5, 2, 3, 0, 4, 1], [0, 1, 1, 3, 0, 2])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sa, lcp1" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" } }, "nbformat": 4, "nbformat_minor": 1 }