This code snippet demonstrates the speed of numba

In [1]:
import numba
import numpy as np
import scipy.spatial.distance as distance

In [2]:
@numba.jit(nopython=True)
def ngrams(string, n=3):
    res = []
    for i in range(len(string) - n):
        res.append(string[i:i+n])
    return res

In [3]:
@numba.jit(nopython=True)
def cosine_sim(u, v):
    ulen, vlen, s = 0, 0, 0
    for k in range(len(u)):
        i = u[k]
        j = v[k]
        s += i * j
        ulen += i ** 2
        vlen += j ** 2
    ulen = np.sqrt(ulen)
    vlen = np.sqrt(vlen)
    return 1 - s / (ulen * vlen)

Show the caluclation is correct

In [6]:
cosine_sim([1,2], [3,4])

0.01613008990009257

In [7]:
distance.cosine([1, 2], [3, 4])

0.01613008990009257

It's faster than scipy!

In [8]:
%timeit cosine_sim([1,2], [3,4])

11.3 µs ± 993 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [9]:
%timeit distance.cosine([1, 2], [3, 4])

26.8 µs ± 273 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
