{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This code snippet demonstrates the speed of numba" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numba\n", "import numpy as np\n", "import scipy.spatial.distance as distance" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "@numba.jit(nopython=True)\n", "def ngrams(string, n=3):\n", " res = []\n", " for i in range(len(string) - n):\n", " res.append(string[i:i+n])\n", " return res" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "@numba.jit(nopython=True)\n", "def cosine_sim(u, v):\n", " ulen, vlen, s = 0, 0, 0\n", " for k in range(len(u)):\n", " i = u[k]\n", " j = v[k]\n", " s += i * j\n", " ulen += i ** 2\n", " vlen += j ** 2\n", " ulen = np.sqrt(ulen)\n", " vlen = np.sqrt(vlen)\n", " return 1 - s / (ulen * vlen)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Show the caluclation is correct" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.01613008990009257" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cosine_sim([1,2], [3,4])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.01613008990009257" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distance.cosine([1, 2], [3, 4])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It's faster than scipy!" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "11.3 µs ± 993 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n" ] } ], "source": [ "%timeit cosine_sim([1,2], [3,4])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "26.8 µs ± 273 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" ] } ], "source": [ "%timeit distance.cosine([1, 2], [3, 4])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }