{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "type Node struct {\n", " child map[byte] *Node // child pointers, key= first char of edge label\n", " id int\n", " lab string // label on edge leading to this node\n", "}" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "// Return new node with given id and incoming edge label\n", "func NewNode(id int, lab string) *Node {\n", " node := new (Node)\n", " node.child = make(map[byte] *Node)\n", " node.id = id\n", " node.lab = lab\n", " return node\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "// Points to particular place in tree; might be partway along an edge\n", "type TreeCursor struct {\n", " node *Node // node below end of walk\n", " depth int // offset along incoming edge\n", "}\n", "\n", "// Return null tree cursor\n", "func EmptyTreeCursor() *TreeCursor {\n", " tc := new (TreeCursor)\n", " tc.node = nil\n", " tc.depth = 0\n", " return tc\n", "}\n", "\n", "// Return tree cursor with particular node and offset\n", "func NewTreeCursor(node *Node, depth int) *TreeCursor {\n", " tc := new (TreeCursor)\n", " tc.node = node\n", " tc.depth = depth\n", " return tc\n", "}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "// Suffix tree is simply a root node\n", "type SuffixTree struct {\n", " root *Node\n", "}\n", "\n", "// Construct suffix tree in quadratic time\n", "func NewSuffixTree(t string) *SuffixTree {\n", " t = t + \"$\"\n", " st := new(SuffixTree)\n", " st.root = NewNode(0, \"\")\n", " nextId := 1\n", " for i := 1; i < len(t); i++ {\n", " cur := st.root\n", " j := i\n", " for j < len(t) {\n", " c := t[j]\n", " if child, ok := cur.child[c]; ok {\n", " k := j+1\n", " for k - j < len(child.lab) && t[k] == child.lab[k - j] {\n", " k++\n", " }\n", " if k - j == len(child.lab) { // walked entire edge\n", " cur = child\n", " j = k\n", " } else { // new internal node & leaf required\n", " cExist := child.lab[k - j]\n", " cNew := t[k]\n", " mid := NewNode(nextId, t[j:k])\n", " mid.child[cNew] = NewNode(nextId + 1, t[k:])\n", " nextId += 2\n", " mid.child[cExist] = child\n", " child.lab = child.lab[k-j:]\n", " cur.child[c] = mid\n", " }\n", " } else {\n", " cur.child[c] = NewNode(nextId, t[j:])\n", " nextId++\n", " }\n", " }\n", " }\n", " return st\n", "}\n", "\n", "// Follow path through tree and return tree cursor from end of walk\n", "func (st *SuffixTree) followPath(s string) *TreeCursor {\n", " cur := st.root\n", " for i := 0; i < len(s); {\n", " c := s[i]\n", " if child, ok := cur.child[c]; ok {\n", " j := i + 1\n", " for j - i < len(child.lab) && j < len(s) && s[j] == child.lab[j - i] {\n", " j++\n", " }\n", " if j - i == len(child.lab) {\n", " // Exhausted edge; descend\n", " cur = child\n", " i = j\n", " } else if j == len(s) {\n", " // Exhausted query string in middle of edge\n", " return NewTreeCursor(child, j-i)\n", " } else {\n", " // Fell off in middle of edge\n", " return EmptyTreeCursor()\n", " }\n", " } else {\n", " return EmptyTreeCursor()\n", " }\n", " }\n", " // Exhausted query string at internal node\n", " return NewTreeCursor(cur, 0)\n", "}\n", "\n", "// Return true iff string has 's' as substring\n", "func (st *SuffixTree) hasSubstring(s string) bool {\n", " tc := st.followPath(s)\n", " return tc.node != nil\n", "}\n", "\n", "// Return true iff string has 's' as suffix\n", "func (st *SuffixTree) hasSuffix(s string) bool {\n", " tc := st.followPath(s)\n", " if tc.node == nil {\n", " return false\n", " }\n", " if tc.depth == 0 {\n", " _, ok := tc.node.child['$']\n", " return ok\n", " } else {\n", " return tc.node.lab[tc.depth] == '$'\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "stree := NewSuffixTree(\"there would have been a time for such a word\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "false\n" ] } ], "source": [ "stree.hasSubstring(\"nope\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "true\n" ] } ], "source": [ "stree.hasSubstring(\"would have been\")" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "true\n" ] } ], "source": [ "stree.hasSubstring(\"such a word\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "true\n" ] } ], "source": [ "stree.hasSubstring(\"\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "false\n" ] } ], "source": [ "stree.hasSuffix(\"would have been\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "true\n" ] } ], "source": [ "stree.hasSuffix(\"such a word\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "true\n" ] } ], "source": [ "stree.hasSuffix(\"\")" ] } ], "metadata": { "kernelspec": { "display_name": "Go (lgo)", "language": "go", "name": "lgo" }, "language_info": { "file_extension": "", "mimetype": "", "name": "go", "version": "" } }, "nbformat": 4, "nbformat_minor": 2 }