{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# A basic bit vector structure\n", "\n", "This is the basic bit vector class we use in discussing the Bloom filter -- we've broken it out into a separate module in order to keep the Bloom filter notebook cleaner." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy\n", "\n", "class BitVector(object):\n", " def __init__(self, size):\n", " self._size = size\n", " ct = size % 64 == 0 and (size / 64) or (size / 64 + 1)\n", " self._entries = numpy.zeros(int(ct), numpy.uint64)\n", " \n", " def __len__(self):\n", " return self._size\n", " \n", " def __getitem__(self, key):\n", " k = int(key)\n", " return (self._entries[int(k / 64)] & numpy.uint64(1 << (k % 64))) > 0\n", " \n", " def __setitem__(self, key, value):\n", " k = int(key)\n", " if value:\n", " update = numpy.uint64(1 << key % 64)\n", " self._entries[int(k / 64)] = self._entries[int(k / 64)] | update\n", " else:\n", " update = numpy.uint64(1 << key % 64)\n", " self._entries[int(k / 64)] = self._entries[int(k / 64)] ^ update\n", " \n", " def merge_from(self, other):\n", " numpy.bitwise_or(self._entries, other._entries, self._entries)\n", " \n", " def intersect_from(self, other):\n", " numpy.bitwise_and(self._entries, other._entries, self._entries)\n", " \n", " def dup(self):\n", " result = BitVector(self._size)\n", " result.merge_from(self)\n", " return result\n", " \n", " def intersect(self, other):\n", " result = BitVector(self._size)\n", " numpy.bitwise_and(self._entries, other._entries, result._entries)\n", " return result\n", " \n", " def union(self, other):\n", " result = BitVector(self._size)\n", " numpy.bitwise_or(self._entries, other._entries, result._entries)\n", " return result\n", " \n", " def count_set_bits(self):\n", " \"\"\" Count the number of bits set in this vector. \n", " There are absolutely better ways to do this\n", " but this implementation is suitable for\n", " occasional use. \"\"\"\n", " def set_bits(i):\n", " result = 0\n", " i = int(i)\n", " while i:\n", " result += (i & 1)\n", " i >>= 1\n", " return result\n", " return sum([set_bits(x) for x in self._entries])\n", " " ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.6", "language": "python", "name": "jupyter" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }