{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import (\n", " \"fmt\"\n", " \"strings\"\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "type Node struct {\n", " child map[byte] int\n", "}\n", "\n", "type SuffixTrie struct {\n", " t string\n", " nodes []*Node\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "func NewNode() *Node {\n", " node := new (Node)\n", " node.child = make(map[byte] int)\n", " return node\n", "}\n", "\n", "func NewSuffixTrie(t string) *SuffixTrie {\n", " st := new(SuffixTrie)\n", " st.nodes = append(st.nodes, NewNode())\n", " for i := 0; i < len(t); i++ {\n", " cur := 0\n", " for _, c := range []byte(t[i:]) {\n", " if node, ok := st.nodes[cur].child[c] ; ok {\n", " cur = node\n", " } else {\n", " st.nodes[cur].child[c] = len(st.nodes)\n", " cur = len(st.nodes)\n", " st.nodes = append(st.nodes, NewNode())\n", " }\n", " }\n", " st.nodes[cur].child['$'] = len(st.nodes)\n", " st.nodes = append(st.nodes, NewNode())\n", " }\n", " return st\n", "}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "func (st *SuffixTrie) followPath(s string) int {\n", " // Follow path given by characters of s. Return node at\n", " // end of path, or -1 if we fall off\n", " cur := 0\n", " for _, c := range []byte(s) {\n", " if node, ok := st.nodes[cur].child[c] ; ok {\n", " cur = node\n", " } else {\n", " return -1\n", " }\n", " }\n", " return cur\n", "}" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "func (st *SuffixTrie) hasSubstring(s string) bool {\n", " // Return true of s appears as a substring of t\n", " return st.followPath(s) >= 0\n", "}" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "func (st *SuffixTrie) hasSuffix(s string) bool {\n", " // Return true of s appears as a substring of t\n", " cur := st.followPath(s)\n", " _, ok := st.nodes[cur].child['$']\n", " return cur >= 0 && ok\n", "}" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "func (st *SuffixTrie) toDot() string {\n", " // Return dot representation of trie to make a picture\n", " lines := []string{\"igraph \\\"Suffix trie\\\" {\",\n", " \" node [shape=circle label=\\\"\\\"];\"}\n", " todo := []int{0}\n", " for len(todo) > 0 {\n", " node := todo[0]\n", " todo = todo[1:]\n", " for c, child := range st.nodes[node].child {\n", " lines = append(lines, fmt.Sprintf(\" %d -> %d [ label=\\\"%c\\\" ];\", node, child, c))\n", " todo = append(todo, child)\n", " }\n", " }\n", " lines = append(lines, \"}\")\n", " return strings.Join(lines, \"\\n\")\n", "}" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "strie := NewSuffixTrie(\"there would have been a time for such a word\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "false" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.hasSubstring(\"nope\")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "true" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.hasSubstring(\"would have been\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "true" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.hasSubstring(\"such a word\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "false" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.hasSuffix(\"would have been\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "true" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie.hasSuffix(\"such a word\")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "igraph \"Suffix trie\" {\n", " node [shape=circle label=\"\"];\n", " 0 -> 1 [ label=\"C\" ];\n", " 0 -> 5 [ label=\"A\" ];\n", " 0 -> 8 [ label=\"T\" ];\n", " 1 -> 2 [ label=\"A\" ];\n", " 5 -> 6 [ label=\"T\" ];\n", " 8 -> 9 [ label=\"$\" ];\n", " 2 -> 3 [ label=\"T\" ];\n", " 6 -> 7 [ label=\"$\" ];\n", " 3 -> 4 [ label=\"$\" ];\n", "}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "strie2 := NewSuffixTrie(\"CAT\")\n", "strie2.toDot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Currently these Go notebooks can't render diagrams; see Python version for the actual tree diagram." ] } ], "metadata": { "kernelspec": { "display_name": "Go", "language": "go", "name": "gophernotes" }, "language_info": { "codemirror_mode": "", "file_extension": ".go", "mimetype": "", "name": "go", "nbconvert_exporter": "", "pygments_lexer": "", "version": "go1.11" } }, "nbformat": 4, "nbformat_minor": 2 }