{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Not a great way to build a suffix array, but we'll use it\n", "# for the small examples here\n", "def naiveBuildSA(t):\n", " satups = sorted([(t[i:], i) for i in range(len(t))])\n", " return list(map(lambda x: x[1], satups))" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Simple function calculating LCP of two string\n", "def lcp(x, y):\n", " for i in range(min(len(x), len(y))):\n", " if x[i] != y[i]: return i\n", " return min(len(x), len(y))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lcp('start', 'stark')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lcp('start', 'star')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lcp('yes', 'no')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Naive way to calculate LCP1 array given string and its suffix array\n", "def naiveLCP1(t, sa):\n", " return [ lcp(t[sa[i]:], t[sa[i+1]:]) for i in range(len(sa)-1) ]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 1, 3, 0, 2]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t = 'abaaba$'\n", "naiveLCP1(t, naiveBuildSA(t))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 8, 1, 5, 1, 3, 0, 7, 0, 4, 0, 2, 0, 6]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t = 'abracadabracada$'\n", "naiveLCP1(t, naiveBuildSA(t))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[15, 14, 7, 0, 10, 3, 12, 5, 8, 1, 11, 4, 13, 6, 9, 2]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "naiveBuildSA(t)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Calculates (l, c) LCPs and (c, r) LCPs from LCP1 array. Returns pair where\n", "# first element is list of LCPs for (l, c) combos and second is LCPs for\n", "# (c, r) combos.\n", "def precomputeLcps(lcp1):\n", " llcp, rlcp = [None] * len(lcp1), [None] * len(lcp1)\n", " lcp1 += [0]\n", " def precomputeLcpsHelper(l, r):\n", " if l == r-1: return lcp1[l]\n", " c = (l + r) // 2\n", " llcp[c-1] = precomputeLcpsHelper(l, c)\n", " rlcp[c-1] = precomputeLcpsHelper(c, r)\n", " return min(llcp[c-1], rlcp[c-1])\n", " precomputeLcpsHelper(0, len(lcp1))\n", " return llcp, rlcp" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "([0, 0, 8, 0, 5, 1, 3, 0, 7, 0, 4, 0, 2, 0, 6],\n", " [1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "precomputeLcps(naiveLCP1(t, naiveBuildSA(t)))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.14" } }, "nbformat": 4, "nbformat_minor": 1 }