{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Using statistics to historically compare notebooks and cells\n", "\n", "This notebooks demonstrates a usage of [2018-08-25-Notebook-git-histories-as-dataframes.ipynb](2018-08-25-Notebook-git-histories-as-dataframes.ipynb) to quantify changes in notebook source code over time.\n", "\n", "> We should include \"git distances\"." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ " from .__Notebook_git_histories_as_dataframes import get_history\n", " from pandas import DataFrame, concat, Series, np\n", " from matplotlib.pyplot import gca, figure, spy, gcf\n", " from toolz import excepts\n", " import sys\n", " from Levenshtein import distance, hamming, jaro, ratio" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://rawgit.com/ztane/python-Levenshtein/master/docs/Levenshtein.html\n", "\n", " !conda install -yc conda-forge python-levenshtein" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "history = get_history('..', 'deathbeds/2018-06-19-String-Node-Transformer.ipynb')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "sources = history.source.apply(''.join).dropna()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For each of the python levenshtein distances we sample compose the distance matrix using a vectorized numpy array. The distance matrix compare the source text in each cell within each revision to quantify a nearness metric." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
distance...ratio
committed_datetime2018-08-19 17:50:19+00:00...2018-07-02 22:09:37+00:00
id0123456789...21222324252627282930
committed_datetimeid
2018-07-07 17:02:56+00:0022354167562484012366113484955...0000000000
2018-07-02 22:09:37+00:0022353168562474012345913494956...0100000000
2018-08-14 03:45:00+00:00263341671212283602251261294124126...0000000000
2018-07-02 22:09:37+00:0017386190512744332615913974451...0000000000
2018-08-18 00:23:08+00:00835916647244403234501356043...0000000000
2018-08-19 17:50:19+00:0019378182492674252515713853946...0000000000
2018-07-03 20:04:16+00:0028386190492734252606213894254...0000000100
2018-07-07 12:54:17+00:00130101632043482071571261166163...0000000000
2018-07-03 20:04:16+00:0012354167512484062345513614752...0000000000
2018-07-07 17:02:56+00:0013379183502654232525713874248...0000000000
\n", "

10 rows × 1408 columns

\n", "
" ], "text/plain": [ " distance \\\n", "committed_datetime 2018-08-19 17:50:19+00:00 \n", "id 0 1 2 3 4 \n", "committed_datetime id \n", "2018-07-07 17:02:56+00:00 22 354 167 56 248 401 \n", "2018-07-02 22:09:37+00:00 22 353 168 56 247 401 \n", "2018-08-14 03:45:00+00:00 26 334 167 121 228 360 \n", "2018-07-02 22:09:37+00:00 17 386 190 51 274 433 \n", "2018-08-18 00:23:08+00:00 8 359 166 47 244 403 \n", "2018-08-19 17:50:19+00:00 19 378 182 49 267 425 \n", "2018-07-03 20:04:16+00:00 28 386 190 49 273 425 \n", "2018-07-07 12:54:17+00:00 1 301 0 163 204 348 \n", "2018-07-03 20:04:16+00:00 12 354 167 51 248 406 \n", "2018-07-07 17:02:56+00:00 13 379 183 50 265 423 \n", "\n", " ... \\\n", "committed_datetime ... \n", "id 5 6 7 8 9 ... \n", "committed_datetime id ... \n", "2018-07-07 17:02:56+00:00 22 236 61 1348 49 55 ... \n", "2018-07-02 22:09:37+00:00 22 234 59 1349 49 56 ... \n", "2018-08-14 03:45:00+00:00 26 225 126 1294 124 126 ... \n", "2018-07-02 22:09:37+00:00 17 261 59 1397 44 51 ... \n", "2018-08-18 00:23:08+00:00 8 234 50 1356 0 43 ... \n", "2018-08-19 17:50:19+00:00 19 251 57 1385 39 46 ... \n", "2018-07-03 20:04:16+00:00 28 260 62 1389 42 54 ... \n", "2018-07-07 12:54:17+00:00 1 207 157 1261 166 163 ... \n", "2018-07-03 20:04:16+00:00 12 234 55 1361 47 52 ... \n", "2018-07-07 17:02:56+00:00 13 252 57 1387 42 48 ... \n", "\n", " ratio \\\n", "committed_datetime 2018-07-02 22:09:37+00:00 \n", "id 21 22 23 24 25 26 27 28 \n", "committed_datetime id \n", "2018-07-07 17:02:56+00:00 22 0 0 0 0 0 0 0 0 \n", "2018-07-02 22:09:37+00:00 22 0 1 0 0 0 0 0 0 \n", "2018-08-14 03:45:00+00:00 26 0 0 0 0 0 0 0 0 \n", "2018-07-02 22:09:37+00:00 17 0 0 0 0 0 0 0 0 \n", "2018-08-18 00:23:08+00:00 8 0 0 0 0 0 0 0 0 \n", "2018-08-19 17:50:19+00:00 19 0 0 0 0 0 0 0 0 \n", "2018-07-03 20:04:16+00:00 28 0 0 0 0 0 0 0 1 \n", "2018-07-07 12:54:17+00:00 1 0 0 0 0 0 0 0 0 \n", "2018-07-03 20:04:16+00:00 12 0 0 0 0 0 0 0 0 \n", "2018-07-07 17:02:56+00:00 13 0 0 0 0 0 0 0 0 \n", "\n", " \n", "committed_datetime \n", "id 29 30 \n", "committed_datetime id \n", "2018-07-07 17:02:56+00:00 22 0 0 \n", "2018-07-02 22:09:37+00:00 22 0 0 \n", "2018-08-14 03:45:00+00:00 26 0 0 \n", "2018-07-02 22:09:37+00:00 17 0 0 \n", "2018-08-18 00:23:08+00:00 8 0 0 \n", "2018-08-19 17:50:19+00:00 19 0 0 \n", "2018-07-03 20:04:16+00:00 28 0 0 \n", "2018-07-07 12:54:17+00:00 1 0 0 \n", "2018-07-03 20:04:16+00:00 12 0 0 \n", "2018-07-07 17:02:56+00:00 13 0 0 \n", "\n", "[10 rows x 1408 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = concat({\n", " callable.__name__: DataFrame(\n", " np.vectorize(\n", " excepts(ValueError, callable, lambda e: 1000)\n", " )(*np.meshgrid(*[sources.values]*2)).astype(int),\n", " sources.index, sources.index)\n", " for callable in (distance, hamming, jaro, ratio)\n", "}, axis=1); df.sample(10)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "from pandas import Series" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "connections = Series(\n", " df['distance'].values.ravel(),\n", " list(map(np.ravel, np.meshgrid(*[df.index]*2))))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# A distribution of distances." ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "connections.hist(bins=100, figsize=(20, 5))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "78650ea8d7bf48f0995954a40bddc653", "version_major": 2, "version_minor": 0 }, "text/plain": [ "interactive(children=(IntSlider(value=500, description='cutoff', max=1000, step=5), Output()), _dom_classes=('…" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "if __name__ == \"__main__\":\n", " from ipywidgets import interact\n", " @interact\n", " def _(cutoff=(0, 1000, 5)): spy(df['distance'] < cutoff); gcf().set_size_inches(15, 15)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## view the sparsity of the matrix." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAQoAAAEHCAYAAABWVQgXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAHk9JREFUeJztnXGsJVV9xz/frrBatQLyIOuyK2jXKJq6sK+41cZaMArUZjGBdjXRjaFZq5Boo1XUpGpTU21UEtIWuwZktVagKmGj2LpFjDUp4Hu6rruuyCq4rLtlnyIItVLBX/+YMzrcve/d++49587v3Dmf5OXdO3fuOd+ZOfd7fuc3Z2ZkZhQKhcJS/EbbAgqFgn+KURQKhYEUoygUCgMpRlEoFAZSjKJQKAykGEWhUBhIMYrCY5C0V9JL2tZR8IXKPIpCoTCIElEUxkbS49rWUEhLMYrCY5B0t6SXSjpL0n9Jul/SYUl/L+nYxnom6RJJdwJ3hmUvlPQ1SQ+E/y9sbUMKUSlGUViMR4G/AE4Efg84B3hjzzoXAC8ATpd0AvB54ArgqcCHgc9LeurEFBeSUYyi0BczmzezW83sETO7G/gn4A96VvtbM7vPzP4X+CPgTjP7RPjOp4DvAH88WeWFFJSxZaEvkp5FFRXMAr9J1Vbme1a7p/H6acAPej7/AbA6lcbC5CgRRWExrqSKCNaZ2W8B7wTUs07zlNkh4Ok9n68FfphMYWFiFKMoLMaTgZ8CD0l6NvCGAevfBDxL0qslPU7SnwKnA59LrLMwAYpRFBbjrcCrgQeBjwLXLbWymf0YeAXwFuDHwNuAV5jZjxLrLEwCM2v1DzgXuAPYD1w2gfruBr4F7ALmwrITgJ1Up/l2AsdHqutq4Aiwp7Gsb11UYf0VYT/sBs5MUPd7qIYCu8Lf+Y3P3hHq/gXwljHqXQPcAuwD9gJvmtR2L1H3MNt9B/DyMep+PHA78M1Q93vD8tOA28J2XwccG5avDO/3h89PTVD3NcBdje1eP+o+T/qjHGIDVwDfA54BHBs29PTEdd4NnNiz7O8IJgVcBnwgUl0vBs7s+bH2rQs4H/hCOIgbgdsS1P0e4K191j097PvVwM+pkpArRqx3Vd3wqIYv3w3lJ9/uJeoetN0rww/6e2Nst4AnhdfHhB//RuB6YHNY/hHgDeH1G4GPhNebgevG2O7F6r4GuLDP+sve520PPc4C9pvZ983s/4BrgU0t6NgEbA+vt1PNDxgbM/sKcN+QdW0CPm4VtwLHSVoVue7F2AT8J1VvdAVVEvOsEes9bGZfD68fpOrdVzOB7V6i7sXYBFxrZg+b2V1UPeyo221m9lB4e0z4M+Bs4NNhee921/vj08A5knqTxePWvRjL3udtG8VqHnuK7SDpT6cZ8EVJ85K2hmUnm9lhqBobcFLC+hera1L74lJJuyVdLen4Rt23mtlxZva2WHVLOhU4g6qHm+h299QNi293tLolrZC0i2rIt5MqQrnfzB7pU/6v6g6fP0A1US1K3WZWb/f7wnZfLmllb919dPWlbaPo56Cpr1J7kZmdCZwHXCLpxYnrG5ZJ7IsrgWcC64HDwIdS1S3pScBngDeb2U+XWnUCdU9ku83sUTNbD5xCFZk8Z4nyk9Yt6XlU+ZdnA79LlSN6+6h1t20UB6kSUDWnUJ2PT4aZHQr/jwA3UB3Qe+vQK/w/klDCYnUl3xdmdm9oUL+kOpNRh9lR65Z0DNUP9ZNm9tmweCLb3a/uSW13jZndD3yZavx/XOOiuWb5v6o7fP4Uhh8qDlP3uWEoZmb2MPAxxtjuto3ia8A6SaeFC442AztSVSbpiZKeXL8GXgbsCXVuCattAW5MpWGJunYAr1XFRuCBOlSPRc849JVU217XvVnSSkmnAeuosuij1CHgKmCfmX248VHy7V6s7glt94yk48LrJwAvpcqR3AJcGFbr3e56f1wIfMlCpjFS3d9pGLOociPN7V7ePh810xrrjyoD+12q8dy7Etf1DKosd30a6V1h+VOBm6lOYd0MnBCpvk9Rhbq/oHLxixeriyoc/IewH74FzCao+xOh7N2hsaxqrP+uUPcdwHlj1Pv7VGHsbhqnIyex3UvUPYnt/h3gG6GOPcBfNdrc7VSJ0n8FVobljw/v94fPn5Gg7i+F7d4D/DO/PjOy7H1eblxTKBQG0vbQo1AoZEAxikKhMJBiFIVCYSDFKAqFwkCSGYWkcyXdIWm/pMuGWH/roHVS0Fa9pe5Sd051JzEKSSuoTr+cR3XhzasknT7ga23txNYOXqm71J1L3akiCi8XexUKhQgkmUch6UKqKaR/Ft6/BniBmV26yPq2du1aZmZmotQ/Pz/Phg0bhlp3YWEhWr3LYX5+npjbvFyG2e7l7MfYdQ/LcjW2dbw91H3gwIEfmdlIAlLdXHfgRSdhzLQVYO3atRw4cIAf/KD33qy+kIT3CWo5aCy0g6SRf2Cphh4DLzoxs21mNmtmszMzM9U00dEux1+SmGWm0hiTHDQW8iOVUYx0sVeKRh67d02h0fs2Q3yNKfCoMaamNrcvydDDzB6RdCnw71S3u7vazPYO+V334XMK8/FO0TgaMTW1uX3JHgBkZjdR3cJ9lO9GNwvv5gNFYyw8avSoaTm4nZkZO8RPdZC6qNFjiN/Eo8bYmia9fW6NAnwe8F66qLGLhhajrNjDkEm2O9dGAfkkD3PQGBvvGmMel9jD4BhMcijj3iggjx4xB43gu9eu8azRW2QBkxmGZGEUUHrtWORwxsazRo8JyUloysYooPTaMfFuuinwqNFbtLMYWRkF5JM89FxeijI99rS9eNSYS7STnVFAHsOQFBSNcfCo0aOmJlkaBXR3GJKCHIZKMfGa4PSmqUm2RgF5jN+7+MPJxdBiluXtbEjsY5q1UUC5kCwWHsfvvXjW6FFbTE3ZGwWUXjsm3vdjoR2mwiigu712FzWmwKNGT5qmxiggXa/t2YBy0SjJvUZveJo27uLZo5KsXFLuj6JxupA0b2azo3zXRUQR+wauuQxDctAYG88aPYX63nBhFCnIIXmYg0aY/hC/xrO2tnFlFKXXjkMOGgt54cooSq8dhxwmoqUoLyaeZ0m2gSujaJLDDycmORhaDhrB3z0jcmh/g3BrFF2dHZmDxth41ujxblRtmI5bo0hBDj1iDhqhOxGfx/xMG5rGul2/pLuBB4FHgUfMbFbSCcB1wKnA3cCfmNlPxqwn2s6pG2WK25F51xiT2BpTGpq3/ehR0yBiRBR/aGbrGxM5LgNuNrN1wM3h/VjkMAzJQaP3bQbfGmMOQ2KVNalILMXQYxOwPbzeDlwQq+CuhLs1uQxDYuJZo8e7UU1qf41rFAZ8UdK8qqeTA5xsZocBwv+T+n1R0lZJc5LmFhYWhqus9NpR8G6Q4FujR22pNY37SMEXmdkhSScBOyV9Z9gvmtk2YBvA7Oxsa91IF8fauWj0OpaPrS1GWan301gRhZkdCv+PADcAZwH3SloFEP4fGVdkP8owxB+5DJXKPIvlM7JRSHqipCfXr4GXAXuAHcCWsNoW4MZxRfajq8OQHDR6x6NGj5qajDP0OBm4ITS0xwH/Ymb/JulrwPWSLgYOABeNL3My5DAMyUFjYfoY2SjM7PvA8/ss/zFwzjiilkPscWyKsXEKjbHxmg9o4lmjR20xNWU/M7PMjoxDDheSeR7Lx9bmLY+SvVE06UqjrMnB0HLQCP5+mN7a31QZRUlwxiGHBGcOGsfFk6apMgrw/0NMNX3cu0ZFvrmutx7XO+Puq6kzitqFPTfKnDTGLM+7xmk2nnH3lRuj8B5KdvVCsth4Cqd78aytbdwYRcm6xyEHjYX8cGMU0N1e2/s2Qx5huUeN3k6Zjooro4A8wj/vY+0UFI2j4X2i3bC4M4qU5DAM8R6tpMCzRo/ayj0ze/AekpehUhw8D5Wm/Q5Zw+LaKHJIzHVRo+fZkTXTPqV60u3OtVFAPj1iDhpj411jzOPiNbKYFO6NAvLoEXPQCL577RrPGr1FFjCZnEUWRgHd/uHEJAdD86zR4zGehKZsjKKmS+FuTRkqxcFjyJ9LgjM7o/B4rrwXzz1is0zvGmPjUWMu8yyyM4qU5DAMyUFjYfrI2ii8h7tlnoVfvCU4Y5YTuyzI3ChKrx2HXIYhXjXG2n8eNdVkbRRQeu1YeBy/9+JZo0dt5Z6ZPZReOx7e92OhHQYahaSrJR2RtKex7ARJOyXdGf4fH5ZL0hWS9kvaLenMlOKb5PDDKUOleOSgcZoYJqK4Bji3Z9llwM1mtg64ObwHOA9YF/62AlfGkTk83ucclHtmxiEXQ/NC8ntmmtlXgPt6Fm8CtofX24ELGss/bhW3AscpPId0Kebn54dXPICu5iy8a6zvmRmTYhbD09Y9M082s8NBwGHgpLB8NXBPY72DYdlRSNoqaU7S3Nq1a0eU0Z9ckoc5aIyNZ43FdBYndjKz357ueyTNbJuZzZrZ7MzMTGQZeSQPc9AIfk9LxsaztrYZ1SjurYcU4f+RsPwgsKax3inAoeUUnEOj7NpYOweN4DMi8DyJajmMahQ7gC3h9Rbgxsby14azHxuBB+ohyrB0Ldyty8thGJKDxlh4vFirzYhn4NPMJX0KeAlwoqSDwLuB9wPXS7oYOABcFFa/CTgf2A/8DHjdqMKkeE9irg+W59AytsaUQyXPGiFO20kxS9Jz+xvEQKMws1ct8tE5fdY14JJxRYWyYhTzmPJiH6zY5eWiMTaeNcbS5lHTcpiKmZnDkkPyMAeN0J08jccooA1NWRhFDo0yB40x6aKhxcKjpkFkYRQ5JA9z0Oh9m8G3Ro8JzkmZThZGUVN67fHL845njR7zKJPaX1kZRS4/nBwMzXu0kgKPGnOZZ5GVUUD3wt26vC5q9LzNHochKaOL7IyiJodeOyZd1Og54ktxWtwz2RpFV3vEHDR6x6NGj5qaZGsUKcihR8xBY2H6mAqjyOGH07WhUmG6mAqjKAnOOJSzK+ORyxmMUZgKo6jpWq+dwzAkB41QEpyDmCqj6GKvnYvG2OSgcVw8aZoqowD/P8RUcyK8a1S5uW6rjLuvps4oahf23Chz0hizPO8ap9l4xt1XbozCeyjZ1dmRsfEUTvfiWVvbuDGKHK5pyOWH6F1jIT/cGAWUrHsscjDdFOXFZJpPdY6CK6NIRQ4hZQ7DkBw0xsLjJeVt0gmjqMmhR8whsoiJd40etbWhybVRdLFHzEGj922GuJeBx8LjpenD4toovPc20E2NXczTdH3m5kCjkHS1pCOS9jSWvUfSDyXtCn/nNz57h6T9ku6Q9PJxBebSKFPg+YeTAs+G5tV4PN0z8xrg3D7LLzez9eHvJgBJpwObgeeG7/yjpBUxhHYp3K3p6jDEq0aPd6OalFkMNAoz+wpw35DlbQKuNbOHzewuqieGnTWGvqaOGMUcRem1xy8vNp41ejzGk9A0To7iUkm7w9Dk+LBsNXBPY52DYdlRSNoqaU7S3MLCwtCVll57fHLRGBtv0QD4jHb6MapRXAk8E1gPHAY+FJb3U9p3r5rZNjObNbPZmZmZoSvO4Zy05x6xWaZ3jbHxqDGX+RojGYWZ3Wtmj5rZL4GP8uvhxUFgTWPVU4BD40mcHDkMQ3LQWJg+RjIKSasab18J1GdEdgCbJa2UdBqwDrh9PIlL6ohaXg7DkBw05mA+3s5gxCwndlkwxNPMJX0KeAlwoqSDwLuBl0haTzWsuBt4PYCZ7ZV0PfBt4BHgEjN7NKriBnUjT3G5ceystMewtya2xpSJZ48aY+0/j5pq5KEBz87O2tzcXNsyHoP3HzfE1+jddFOUF5OoP8w0x2LezGZH+a7rmZnLwXuID/415jJU8kouiclRmBqj6PI8ixw0xiYHjdPE1BhFjfc5B6l6be8aVe6Z2Srj7isXOQpJ1pVxbE3RGIccNHoh+xzFhg0bopaXQ8PJRaP3XturRo+axsGFUaQih4PlXaPXH2ITj6brUdM4uDOKHKYVd22snYNG8Gm6nidRLQd3RlFOS8bBe8IUfB8XjxdrtRmluDOKmtJrj19ebHLQCOVuVClwaxRdapQ1uWj03ug9avR4h6zl4NYooJvDEPCvsWvDkFjkPHPTtVHU5NAj5qAxJl2M+GLhUdMgsjCKHBplDhohD0PzrNFb/gMmYzxZGAX4DCV76aLGXC4k83jmwaOmxcjGKGpyCNu8j99jl9nFoVIMPEY7i5GdUZTkYbwyY5fn3SC9RgM5RBbZGUVN6RH90aU8TdfmWWRrFF1qlDW5aPTe6D2G/B41NcnWKKCbycMUlGGIj7I8aqrJ2igKfulixDfNFKNYhBwaUA69dg4aY+F56DAuU2UUZaw9fnmxyUEjlATnIAYahaQ1km6RtE/SXklvCstPkLRT0p3h//FhuSRdIWm/qmeTnpl6I2pSNUrPvWIuGhX5npmQR6J4HDxFO8NEFI8AbzGz5wAbgUsknQ5cBtxsZuuAm8N7gPOonhC2DthK9ZzSQqGQMQONwswOm9nXw+sHgX1UTyjfBGwPq20HLgivNwEft4pbgeP02EcQJiVFD5tDpOI9xDcz9xo9hfqxGXdfLStHIelU4AzgNuBkMzscRBwGTgqrrQbuaXztYFjWW9ZWSXOS5hYWFpavfAApGnpsctAYG88Xknmfy9AmQxuFpCcBnwHebGY/XWrVPsuOasFmts3MZs1sdmZmZlgZhSnA85R0r1Oq2zadoYxC0jFUJvFJM/tsWHxvPaQI/4+E5QeBNY2vnwIciiN3NNreycPgOdGXokxvWf0mHiOLtqPFYc56CLgK2GdmH258tAPYEl5vAW5sLH9tOPuxEXigHqK0Rds7eRhymB2Zg8ZYeJ4l2QaPG2KdFwGvAb4laVdY9k7g/cD1ki4GDgAXhc9uAs4H9gM/A14XVfEYSP6frB2zzPqH6LmhetfoUVsbmgYahZl9lf55B4Bz+qxvwCVj6kpC7EaZcoKSZ43QHUMrmircz8z0Hj5D0RirPM95Go9DpElqcm8Unqfs1uSi0WNjb+J5noXXBOekjql7o6jx3iOCf43ee23wrdHjqdNJmUU2RlF67Th47rWbZXrV6PEYT0JTNkaRCs+NskkXNXr7QdZ4HYakpPNGAT4z2710UWMZhiyfVOZajKLgGs8Rn0fzTqWpGEUhCV0ZhnSF7I3C+zUNKcpLQRfP2MTEo7aYmrI3iq5l3Zvk0GvnoDEG057gzN4oUuFx/NlLDvezyCGy8JiU9Nb+ilEMwGujTFmm1167JpeIb5qYOqPwPlMw1eXf3jXGvrmu52GIR8bdV/LgppLM++XfsSka45CDRi9Imjez2VG+6yKi2LBhQ9Tycmg4uWj03mt71ehR0zi4MIqCX3K5eC5WmR5nbnrAnVHksHO7ljwE/9eaxPpher6ArM124s4ocgnJPZeXglw0ejPdaTll6s4oakqv7RPvGj2aBfjfb4NwaxS5nCvvokbvjd6jxtyHIW6NAnwe8F66qLGLhjbt+Y9BuDYK8D1ltyYXjbHxrnHa7xkxyZzFMA8AWiPpFkn7JO2V9Kaw/D2SfihpV/g7v/Gdd0jaL+kOSS8fV2QOPWIOGsF3r13jWaO3yAImMwwZ5gFAjwBvMbOvS3oyMC9pZ/jscjP7YHNlSacDm4HnAk8D/kPSs8zs0XGE5pJ1906qXttzdj+mRo8ziCfR7gZGFGZ22My+Hl4/COyjz9PJG2wCrjWzh83sLqonhp0VQyzkkT32PgyJXWYX8zQx8BjtLMaychSSTgXOAG4Liy6VtFvS1ZKOD8tWA/c0vnaQPsYiaaukOUlzCwsLQ2voaq8dmxw0gm9D8zYMSWmGQxuFpCdRPdH8zWb2U+BK4JnAeuAw8KF61T5fP2pvmNk2M5s1s9mZmZllC8+h145NFzWWBOfyaPWemZKOoTKJT5rZZ4Oge83sUTP7JfBRfj28OAisaXz9FOBQPMkVXU0epsBzr50Cr5GFN01NhjnrIeAqYJ+ZfbixfFVjtVcCe8LrHcBmSSslnQasA26PJ/nXdHEYkoJchiEx8RhZeE4ID3PW40XAa4BvSdoVlr0TeJWk9VTDiruB1weBeyVdD3yb6ozJJeOe8Sjkifd7RaQ4YzOtDDQKM/sq/fMONy3xnfcB7xtDV2EKSDlUitmLF7MYjPuZmcuhJDh90qUEZ0w8aZoqo2g2oNinrrzOZUitMQa5aPSmzVOUM1VG0SSHBF0OGmPjedq39zMPbTK1RlHwSwqziJ2ziFVWLNo2nU4YRds7eRhyyK90Zb6Fx8ii7WixE0bR9k4ehhyGITlojEWJLB5LJ4yipmu9doryUuBVo8eopy1NnTKKHHrEHDTGxrNGr2YxadwbhbeD1I+iMQ5eoymPZgGTPabujSKXC6FS0JXkYY3nC/28JjgndUzdG0WN19NpTbxrzOXenl41ekxwTsossjGK0mvHwXOv3SzTq0aPx3gSmrIxilR4bpRNuqjR2w+yxuswJCWdNwrwnXWv6aLGMgxZPq3fCq9QaAPPp4s9Rj2pNGVtFN4OUj+6qLFL2xzzh+lRU03WRuH5SsQaj71OLyVPM35Z3vIMsY9p1kYB3cu6N8nB0HLQGINpT3BmbxSp6GryMDY5aIzFNCc4i1EUChHxGPXEMJ1iFIUl8dbo++FNo0ezGJdiFIUlyWWqexfOPLTJMA8Aeryk2yV9U9JeSe8Ny0+TdJukOyVdJ+nYsHxleL8/fH7qcgTlsHPL6cTx8JyA9nzmoc12MkxE8TBwtpk9n+o5o+dK2gh8ALjczNYBPwEuDutfDPzEzH4buDysNzS59GCxy/NuPrlojIXHyKLNRO5Ao7CKh8LbY8KfAWcDnw7LtwMXhNebwnvC5+dohD3VldNqNTmcks1BI/g7tZhD+xvEsA8pXqHqcYJHgJ3A94D7zeyRsMpBYHV4vRq4ByB8/gDw1D5lbpU0J2luYWHhqDq71ChrctHovdF71Jj7MGQoo7DqqeXrqZ5MfhbwnH6rhf/9tuCoX4CZbTOzWTObnZmZWazeYeS1Shc1dnGbY5DzMGRZZz3M7H7gy8BG4DhJ9bNLTwEOhdcHgTUA4fOnAPeNI9L72DhFmd56xH4UjcvHY7QzDMOc9ZiRdFx4/QTgpcA+4BbgwrDaFuDG8HpHeE/4/Es2pv3lkDzMQaP3bQbfGj0mOCdlOgOfZg6sArZLWkFlLNeb2eckfRu4VtLfAN8ArgrrXwV8QtJ+qkhicwLdY+MxNO0llyGDFPfp4imIoTFFgtOTpqUYaBRmths4o8/y71PlK3qX/xy4KIq6PsRslKmIrTHFNsf+cXs/Lh41xtaUcvuym5nZtXC3Lq+LGj1vs8dhSEoTzM4oasopP3906ZR21+ZZZGsUXWqUNblo9N7oPc5p8KipSbZGAd1MSKYgl6RpTGJHBLHK8ToMydooCoVpw2tkUYxiEbyHz5DHJK8cNMZimhOcU2UUZaw9fnmxyUEjlATnIKbKKMpY2ydF42h4MoupMopCYdrwYhbFKAqt4KHxL4Y3bR7Mwp1RtL1DhqFLCboaz7Mka7wlAMFngnMU3BmFx7FiLyUX4pMU18OMy7QkON0ZRUpyiARy0Bgbrxrb7sX70ZYm10bhPdwtF2vFoQxDlkcbZuHaKDw6ei9d1Oh5LkONxxmOOQ9DXBsF5NMoU+D5h5MCz4bm1XgmdUzdG0VNl8Ldmq4OQ7xq9Di1elJmkY1RlF47Dp577WaZXjV6PMaT0JSNUdSUXnt8ctEYG2/RAPiMdvqRnVF08Xx+ql7bc3k1niO+LiU4szOKQrfwbGhdGoZkbRTeDlI/uqixS9sc84fpUVPNMA8Aeryk2yV9U9JeSe8Ny6+RdJekXeFvfVguSVdI2i9pt6QzoypukMo9PYe7KfCcPGziVWOs/edRU80wDwB6GDjbzB6SdAzwVUlfCJ/9pZl9umf984B14e8FwJXhfxK6PH6PSdE4HrETnDHKq80iRlkDIwqreCi8PSb8LVXzJuDj4Xu3Uj2jdNXYSguFDJjWyGKoHIWkFZJ2AUeAnWZ2W/jofWF4cbmklWHZauCextcPhmW9ZW6VNCdpbmFhYYxNOKrcaGWlKrOLGiW51xgDj1FPDLMYZuiBmT0KrFf1sOIbJD0PeAfw38CxwDbg7cBfA/0UHbX3zGxb+B6SFiT9D/CjUTail2XulBOHqTdRIx+q7mWWOSwD605oaNG2ewSNUfd5ZnU/fdQvD2UUNWZ2v6QvA+ea2QfD4oclfQx4a3h/EFjT+NopwKEB5c5ImjOz2eXoiUFb9Za6S90t1H3qqN8f5qzHTIgkkPQE4KXAd+q8gypLvwDYE76yA3htOPuxEXjAzA6PKrBQKLTPMBHFKmC7pBVUxnK9mX1O0pckzVANNXYBfx7Wvwk4H9gP/Ax4XXzZhUJhkgw0CjPbDZzRZ/nZi6xvwCUjaNk2wndi0Fa9pe5SdzZ1y2OWtlAo+CLrKdyFQmEyFKMoFAoDKUZRKBQGUoyiUCgMpBhFoVAYSDGKQqEwkP8Hz0hr92hd/2kAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "for alias in (distance, hamming, jaro, ratio):\n", " figure();spy(df[alias.__name__])\n", " gca().set_title(alias.__name__)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }