{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Testing different schemes for encoding ids" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "const convertHrtime = require('convert-hrtime');" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "const d3array = require('d3-array')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Timing code" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "export function timeit(n: number, f: any, args: any[]) {\n", " let sum = 0.0;\n", " for (i=0; i> 3;\n", " }\n", "\n", " export\n", " function idPathAt(id: string, i: number): number {\n", " let j = i << 3;\n", " let a = id.charCodeAt(j + 0);\n", " let b = id.charCodeAt(j + 1);\n", " let c = id.charCodeAt(j + 2);\n", " return a * 0x100000000 + b * 0x10000 + c;\n", " }\n", "\n", " export\n", " function idVersionAt(id: string, i: number): number {\n", " let j = i << 3;\n", " let a = id.charCodeAt(j + 3);\n", " let b = id.charCodeAt(j + 4);\n", " let c = id.charCodeAt(j + 5);\n", " return a * 0x100000000 + b * 0x10000 + c;\n", " }\n", "\n", " export\n", " function idStoreAt(id: string, i: number): number {\n", " let j = i << 3;\n", " let a = id.charCodeAt(j + 6);\n", " let b = id.charCodeAt(j + 7);\n", " return a * 0x10000 + b;\n", " }\n", "\n", " export\n", " function randomPath(min: number, max: number): number {\n", " return min + Math.round(Math.random() * Math.sqrt(max - min));\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "export\n", "function createDuplexId(version: number, store: number): string {\n", " // Split the version into 16-bit values.\n", " let vc = version & 0xFFFF;\n", " let vb = (((version - vc) / 0x10000) | 0) & 0xFFFF;\n", " let va = (((version - vb - vc) / 0x100000000) | 0) & 0xFFFF;\n", "\n", " // Split the store id into 16-bit values.\n", " let sb = store & 0xFFFF;\n", " let sa = (((store - sb) / 0x10000) | 0) & 0xFFFF;\n", "\n", " // Convert the parts into a string identifier duplex.\n", " return String.fromCharCode(va, vb, vc, sa, sb);\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "export\n", "function createTriplexId(version: number, store: number, lower: string, upper: string): string {\n", " // The maximum path in a triplex id.\n", " const MAX_PATH = 0xFFFFFFFFFFFF;\n", "\n", " // Set up the variable to hold the id.\n", " let id = '';\n", "\n", " // Fetch the triplet counts of the ids.\n", " let lowerCount = lower ? Private.idTripletCount(lower) : 0;\n", " let upperCount = upper ? Private.idTripletCount(upper) : 0;\n", "\n", " // Iterate over the id triplets.\n", " for (let i = 0, n = Math.max(lowerCount, upperCount); i < n; ++i) {\n", " // Fetch the lower identifier triplet, padding as needed.\n", " let lp: number;\n", " let lc: number;\n", " let ls: number;\n", " if (i >= lowerCount) {\n", " lp = 0;\n", " lc = 0;\n", " ls = 0;\n", " } else {\n", " lp = Private.idPathAt(lower, i);\n", " lc = Private.idVersionAt(lower, i);\n", " ls = Private.idStoreAt(lower, i);\n", " }\n", "\n", " // Fetch the upper identifier triplet, padding as needed.\n", " let up: number;\n", " let uc: number;\n", " let us: number;\n", " if (i >= upperCount) {\n", " up = upperCount === 0 ? MAX_PATH + 1 : 0;\n", " uc = 0;\n", " us = 0;\n", " } else {\n", " up = Private.idPathAt(upper, i);\n", " uc = Private.idVersionAt(upper, i);\n", " us = Private.idStoreAt(upper, i);\n", " }\n", "\n", " // If the triplets are the same, copy the triplet and continue.\n", " if (lp === up && lc === uc && ls === us) {\n", " id += Private.createTriplet(lp, lc, ls);\n", " continue;\n", " }\n", "\n", " // If the triplets are different, the well-ordered identifiers\n", " // assumption means that the lower triplet compares less than\n", " // the upper triplet. The task now is to find the nearest free\n", " // path slot among the remaining triplets.\n", "\n", " // If there is free space between the path portions of the\n", " // triplets, select a new path which falls between them.\n", " if (up - lp > 1) {\n", " let np = Private.randomPath(lp + 1, up - 1);\n", " id += Private.createTriplet(np, version, store);\n", " return id.slice();\n", " }\n", "\n", " // Otherwise, copy the left triplet and reset the upper count\n", " // to zero so that the loop chooses the nearest available path\n", " // slot after the current lower triplet.\n", " id += Private.createTriplet(lp, lc, ls);\n", " upperCount = 0;\n", " }\n", "\n", " // If this point is reached, the lower and upper identifiers share\n", " // the same path but diverge based on the version or store id. It is\n", " // safe to insert anywhere in an extra triplet.\n", " let np = Private.randomPath(1, MAX_PATH);\n", " id += Private.createTriplet(np, version, store);\n", " return id.slice();\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "export\n", "function createTriplexIds(n: number, version: number, store: number, lower: string, upper: string): string[] {\n", " let ids: string[] = [];\n", "\n", " while (ids.length < n) {\n", " let id = createTriplexId(version, store, lower, upper);\n", " ids.push(id);\n", " lower = id;\n", " }\n", "\n", " return ids;\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Base64 encoding" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "export function encodeBase64(input: string): string {\n", " const buffer = Buffer.from(input);\n", " return buffer.toString('base64');\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "export function decodeBase64(input: string): string {\n", " return Buffer.from(input, 'base64').toString()\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Regular expression (Ian's PR)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "const HS_L = '\\uD800';\n", "const HS_U = '\\uDBFF';\n", "const LS_L = '\\uDC00';\n", "const LS_U = '\\uDFFF';\n", "const LS_REGEX = new RegExp(`([${LS_L}-${LS_U}])`, 'g');\n", "const UNPAIRED_HS_REGEX = new RegExp(\n", " `([${HS_L}-${HS_U}])(?![${LS_L}-${LS_U}])`,\n", " 'g',\n", ");\n", "const PAIRED_LS_REGEX = new RegExp(`X${HS_L}([${LS_L}-${LS_U}])`, 'g');\n", "const PAIRED_HS_REGEX = new RegExp(`([${HS_L}-${HS_U}])${LS_L}X`, 'g');\n", "\n", "export\n", "function stripSurrogates(id: string): string {\n", " return id.replace(PAIRED_LS_REGEX, '$1').replace(PAIRED_HS_REGEX, '$1');\n", "}\n", "\n", "export\n", "function generateIdString(str: string): string {\n", " str = str.replace(LS_REGEX, `X${HS_L}$1`);\n", " str = str.replace(UNPAIRED_HS_REGEX, `$1${LS_L}X`);\n", " return str;\n", "}\n", "\n", "export\n", "function stringToCharCodes(s: string): number[] {\n", " result = new Array();\n", " for (i=0; i {\n", " return timeit(100, encodeBase64, [item])\n", "})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "meanA = d3array.mean(timesA)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "timesB = ids.map(item => {\n", " return timeit(100, generateIdString, [item])\n", "})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "meanB = d3array.mean(timesB)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "meanB/meanA" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "patchIds = createTriplexIds(2**13, 1, 1, ids[0], ids[1])" ] } ], "metadata": { "kernelspec": { "display_name": "jp-Babel (Node.js)", "language": "babel", "name": "babel" }, "language_info": { "file_extension": ".js", "mimetype": "application/javascript", "name": "javascript", "version": "12.12.0" } }, "nbformat": 4, "nbformat_minor": 4 }