{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "// A simple example of a substring index; mirrors example from lecture notes\n", "\n", "// we're going to extract 4 substrings like this:\n", "// t: CGTGCCTACTTACTTACAT\n", "// substring 1: CGTGC\n", "// substring 2: CCTAC\n", "// substring 3: CTTAC\n", "// substring 4: CTTAC\n", "t := \"CGTGCCTACTTACTTACAT\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "// From t, make list of pairs, where first pair item is substring, second is its offset\n", "func substringize(t string, ln int, iv int) ([]string, []int) {\n", " // ln = length of substrings to extract\n", " // iv = distance between substings to extract; e.g. 1 means take *every* substring\n", " strings := make([]string, 0)\n", " offsets := make([]int, 0)\n", " for i := 0; i < len(t) - ln + 1; i += iv {\n", " strings = append(strings, t[i:i+ln])\n", " offsets = append(offsets, i)\n", " }\n", " return strings, offsets\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[CGTGC CCTAC CTTAC CTTAC] [0 4 8 12]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "substringize(\"CGTGCCTACTTACTTACAT\", 5, 4)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "// Like substringize, but uses a map data structure\n", "func mapize(t string, ln int, iv int) map[string][]int {\n", " index := make(map[string][]int)\n", " for i := 0; i < len(t) - ln + 1; i += iv {\n", " sub := t[i:i+ln]\n", " index[sub] = append(index[sub], i)\n", " }\n", " return index\n", "}" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "map[CGTGC:[0] CCTAC:[4] CTTAC:[8 12]]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "index := mapize(\"CGTGCCTACTTACTTACAT\", 5, 4)\n", "index" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "p := \"CTTACTTA\"" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[8 12]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "// index: give me a hint where I should look for occurrences of p in t\n", "elem, ok := index[p[:5]]\n", "elem" ] } ], "metadata": { "kernelspec": { "display_name": "Go", "language": "go", "name": "gophernotes" }, "language_info": { "codemirror_mode": "", "file_extension": ".go", "mimetype": "", "name": "go", "nbconvert_exporter": "", "pygments_lexer": "", "version": "go1.11" } }, "nbformat": 4, "nbformat_minor": 2 }