{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "class SuffixTrie(object):\n", " \"\"\" Encapsulates a suffix trie of a provided string t \"\"\"\n", " \n", " def __init__(self, t):\n", " \"\"\" Make suffix trie from t \"\"\"\n", " t += '$' # terminator symbol\n", " self.root = {}\n", " for i in range(len(t)): # for each suffix\n", " cur = self.root\n", " for c in t[i:]: # for each character in i'th suffix\n", " if c not in cur:\n", " cur[c] = {} # add outgoing edge if necessary\n", " cur = cur[c]\n", " \n", " def follow_path(self, s):\n", " \"\"\" Follow path given by characters of s. Return node at\n", " end of path, or None if we fall off. \"\"\"\n", " cur = self.root\n", " for c in s:\n", " if c not in cur:\n", " return None # no outgoing edge on next character\n", " cur = cur[c] # descend one level\n", " return cur\n", " \n", " def has_substring(self, s):\n", " \"\"\" Return true if s appears as a substring of t \"\"\"\n", " return self.follow_path(s) is not None\n", " \n", " def has_suffix(self, s):\n", " \"\"\" Return true if s is a suffix of t \"\"\"\n", " node = self.follow_path(s)\n", " return node is not None and '$' in node\n", " \n", " def to_dot(self):\n", " \"\"\" Return dot representation of trie to make a picture \"\"\"\n", " lines = []\n", " def _to_dot_helper(node, parid):\n", " childid = parid\n", " for c, child in node.items():\n", " lines.append(' %d -> %d [ label=\"%s\" ];' % (parid, childid+1, c))\n", " childid = _to_dot_helper(child, childid+1)\n", " return childid\n", " lines.append('digraph \"Suffix trie\" {')\n", " lines.append(' node [shape=circle label=\"\"];')\n", " _to_dot_helper(self.root, 0)\n", " lines.append('}')\n", " return '\\n'.join(lines) + '\\n'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "strie = SuffixTrie('there would have been a time for such a word')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.has_substring('nope')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.has_substring('would have been')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.has_substring('such a word')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.has_suffix('would have been')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.has_suffix('such a word')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# might have to do this first:\n", "# %install_ext https://raw.github.com/cjdrake/ipython-magic/master/gvmagic.py\n", "%load_ext gvmagic" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "Suffix trie\n", "\n", "\n", "0\n", "\n", "\n", "\n", "1\n", "\n", "\n", "\n", "0->1\n", "\n", "\n", "A\n", "\n", "\n", "4\n", "\n", "\n", "\n", "0->4\n", "\n", "\n", "C\n", "\n", "\n", "8\n", "\n", "\n", "\n", "0->8\n", "\n", "\n", "T\n", "\n", "\n", "10\n", "\n", "\n", "\n", "0->10\n", "\n", "\n", "$\n", "\n", "\n", "2\n", "\n", "\n", "\n", "1->2\n", "\n", "\n", "T\n", "\n", "\n", "3\n", "\n", "\n", "\n", "2->3\n", "\n", "\n", "$\n", "\n", "\n", "5\n", "\n", "\n", "\n", "4->5\n", "\n", "\n", "A\n", "\n", "\n", "6\n", "\n", "\n", "\n", "5->6\n", "\n", "\n", "T\n", "\n", "\n", "7\n", "\n", "\n", "\n", "6->7\n", "\n", "\n", "$\n", "\n", "\n", "9\n", "\n", "\n", "\n", "8->9\n", "\n", "\n", "$\n", "\n", "\n", "\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%dotstr SuffixTrie(\"CAT\").to_dot()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }