{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Reading programming pearls. How do you compute fast anagrams?" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def signature(word):\n", " \"Computes the signature of a word.\"\n", " return \"\".join(sorted(word))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'deiopst'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "signature(\"deposit\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'deiopst'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "signature(\"dopiest\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'deiopst'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "signature(\"posited\")" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'deiopst'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "signature(\"topside\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's tackle the problem that is stated in the programming pearls column: let's get a big file full of words and find all anagram groups inside it." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We'll download the big file of words from Peter Norvig's website." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import requests" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "r = requests.get('http://norvig.com/ngrams/count_1w.txt')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "4956241" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(r.text)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'the\\t23135851162\\nof\\t13151942776\\nand\\t12997637966\\nto\\t12136980858\\na\\t9081174698\\nin\\t8469404971\\nfor\\t5933321'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r.text[:100]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "words = [line.split('\\t')[0] for line in r.text.split('\\n')]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "333334" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(words)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['the', 'of', 'and', 'to', 'a', 'in', 'for', 'is', 'on', 'that']" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "words[:10]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "signatures = [signature(word) for word in words]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, let's build a dictionary using the signatures:" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [], "source": [ "anagrams = {}\n", "for word in words:\n", " s = signature(word)\n", " if s in anagrams:\n", " anagrams[s].append(word)\n", " else:\n", " anagrams[s] = [word]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['aerst',\n", " 'acrs',\n", " 'abcs',\n", " 'acis',\n", " 'acms',\n", " 'acps',\n", " 'acst',\n", " 'aips',\n", " 'cips',\n", " 'aces',\n", " 'ahins',\n", " 'aeprs',\n", " 'apst',\n", " 'adir',\n", " 'adnor',\n", " 'ainst',\n", " 'acir',\n", " 'aeps',\n", " 'aims',\n", " 'aist',\n", " 'acip',\n", " 'acds',\n", " 'cops',\n", " 'airst',\n", " 'ails',\n", " 'ains',\n", " 'acmr',\n", " 'arst',\n", " 'aorst',\n", " 'gglloo',\n", " 'eorst',\n", " 'ceis',\n", " 'einst',\n", " 'aest',\n", " 'aeimnr',\n", " 'amps',\n", " 'acmp',\n", " 'eist',\n", " 'cers',\n", " 'adis',\n", " 'amst',\n", " 'aens',\n", " 'aelst',\n", " 'acls',\n", " 'eimst',\n", " 'cims',\n", " 'acpr',\n", " 'aems',\n", " 'cmps',\n", " 'ceps',\n", " 'imst',\n", " 'acert',\n", " 'acfs',\n", " 'cest',\n", " 'acpt',\n", " 'aemns',\n", " 'abis',\n", " 'eims',\n", " 'acdp',\n", " 'acim',\n", " 'adenr',\n", " 'acep',\n", " 'acehrs',\n", " 'eops',\n", " 'acns',\n", " 'cios',\n", " 'aers',\n", " 'eips',\n", " 'aels',\n", " 'airs',\n", " 'aenrt',\n", " 'aenrs',\n", " 'adem',\n", " 'acrt',\n", " 'cdis',\n", " 'aelps',\n", " 'aegnr',\n", " 'alps',\n", " 'prst',\n", " 'cems',\n", " 'eirst',\n", " 'aeiln',\n", " 'amsu',\n", " 'eggloo',\n", " 'acenr',\n", " 'acin',\n", " 'imps',\n", " 'afis',\n", " 'aeirs',\n", " 'acdi',\n", " 'adin',\n", " 'ades',\n", " 'airt',\n", " 'dein',\n", " 'cprs',\n", " 'acil',\n", " 'enort',\n", " 'aert',\n", " 'aemnor',\n", " 'cpst',\n", " 'aelm',\n", " 'aipr',\n", " 'acet',\n", " 'cdps',\n", " 'acel',\n", " 'afir',\n", " 'eirt',\n", " 'inos',\n", " 'aprt',\n", " 'eirs',\n", " 'astu',\n", " 'egilo',\n", " 'achs',\n", " 'eerst',\n", " 'cdip',\n", " 'agnor',\n", " 'acfi',\n", " 'eegnr',\n", " 'aarst',\n", " 'deir']" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sorted(anagrams.keys(), key=lambda k: len(anagrams[k]), reverse=True)[:120]" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['radon',\n", " 'doran',\n", " 'adorn',\n", " 'ronda',\n", " 'norad',\n", " 'andro',\n", " 'daron',\n", " 'andor',\n", " 'rodan',\n", " 'rando',\n", " 'nardo',\n", " 'dorna',\n", " 'drano',\n", " 'narod',\n", " 'nador',\n", " 'donar',\n", " 'ondra',\n", " 'adron',\n", " 'ardon',\n", " 'drona',\n", " 'arond']" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "anagrams['adnor']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Finally, let's do a little scatter plot of lenght of word vs number of anagrams. Let's use pandas for that." ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = " ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(243439,)" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = pd.DataFrame(pd.Series(anagrams),\n", " columns=['words'])" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df['signature_length'] = [len(item) for item in df.index]" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df['word_count'] = [len(item) for item in df.words]" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordssignature_lengthword_count
[]01
a[a]11
aa[aa]21
aaa[aaa]31
aaaa[aaaa]41
aaaaaaaaaahhhhhhhhhh[hahahahahahahahahaha]201
aaaaaaaabbllmm[alabamaalabama]141
aaaaaaaaddeeeeggggillmnnoprtuuuu[grenadaguadeloupeguatemalaguiana]321
aaaaaaaahhhhhhhh[hahahahahahahaha]161
aaaaaaahhhhhhh[hahahahahahaha]141
aaaaaaahnnnnnrty[ananthanarayanan]161
aaaaaaaknnnnrrsy[sankaranarayanan]161
aaaaaabbbehiimmrswzz[saharazambiazimbabwe]201
aaaaaabddeemsttt[databasemetadata]161
aaaaaaccddnn[canadacanada]121
aaaaaacceeefgilllmnnorrssstttyy[acetylgalactosaminyltransferase]311
aaaaaacefghiillmrrssty[facialsmargaritaashley]221
aaaaaagmnrsy[arasanayagam]121
aaaaaahhhhhh[hahahahahaha]121
aaaaaakkllss[alaskaalaska]121
aaaaaannnrstyy[satyanarayanan]141
aaaaaannrstyy[satyanarayana]131
aaaaabbcdrr[abracadabra]111
aaaaabbceiklnrs[caribbeanalaska]151
aaaaabbilmmnrsu[balasubramaniam]151
aaaaabbilmnnrsu[balasubramanian]151
aaaaabblmmnrsuy[balasubramanyam]151
aaaaabbmt[bambaataa]91
aaaaabccceeegghhiiilnoorrrtuvzz[herzegovinabulgariacroatiaczech]311
aaaaabcdddeeeglmnnnrrstu[bermudacanadagreenlandst]241
............
wxy[wyx, wxy]32
wxyz[wxyz]41
wxz[zxw]31
wy[wy, yw]22
wz[wz, zw]22
wzz[zzw]31
x[x]11
xx[xx]21
xxx[xxx]31
xxxx[xxxx]41
xxy[xxy, xyx]32
xxz[zxx, xxz]32
xy[xy, yx]22
xyy[xyy, yxy]32
xyyzz[xyzzy]51
xyz[xyz, zyx, zxy]33
xz[zx, xz]22
xzz[zzx, zxz]32
y[y]11
yy[yy]21
yyy[yyy]31
yyyy[yyyy]41
yyz[yyz]31
yyzz[zzyy]41
yz[yz, zy]22
yzz[zzy, zyz, yzz]33
z[z]11
zz[zz]21
zzz[zzz]31
zzzz[zzzz]41
\n", "

243439 rows × 3 columns

\n", "
" ], "text/plain": [ " words \\\n", " [] \n", "a [a] \n", "aa [aa] \n", "aaa [aaa] \n", "aaaa [aaaa] \n", "aaaaaaaaaahhhhhhhhhh [hahahahahahahahahaha] \n", "aaaaaaaabbllmm [alabamaalabama] \n", "aaaaaaaaddeeeeggggillmnnoprtuuuu [grenadaguadeloupeguatemalaguiana] \n", "aaaaaaaahhhhhhhh [hahahahahahahaha] \n", "aaaaaaahhhhhhh [hahahahahahaha] \n", "aaaaaaahnnnnnrty [ananthanarayanan] \n", "aaaaaaaknnnnrrsy [sankaranarayanan] \n", "aaaaaabbbehiimmrswzz [saharazambiazimbabwe] \n", "aaaaaabddeemsttt [databasemetadata] \n", "aaaaaaccddnn [canadacanada] \n", "aaaaaacceeefgilllmnnorrssstttyy [acetylgalactosaminyltransferase] \n", "aaaaaacefghiillmrrssty [facialsmargaritaashley] \n", "aaaaaagmnrsy [arasanayagam] \n", "aaaaaahhhhhh [hahahahahaha] \n", "aaaaaakkllss [alaskaalaska] \n", "aaaaaannnrstyy [satyanarayanan] \n", "aaaaaannrstyy [satyanarayana] \n", "aaaaabbcdrr [abracadabra] \n", "aaaaabbceiklnrs [caribbeanalaska] \n", "aaaaabbilmmnrsu [balasubramaniam] \n", "aaaaabbilmnnrsu [balasubramanian] \n", "aaaaabblmmnrsuy [balasubramanyam] \n", "aaaaabbmt [bambaataa] \n", "aaaaabccceeegghhiiilnoorrrtuvzz [herzegovinabulgariacroatiaczech] \n", "aaaaabcdddeeeglmnnnrrstu [bermudacanadagreenlandst] \n", "... ... \n", "wxy [wyx, wxy] \n", "wxyz [wxyz] \n", "wxz [zxw] \n", "wy [wy, yw] \n", "wz [wz, zw] \n", "wzz [zzw] \n", "x [x] \n", "xx [xx] \n", "xxx [xxx] \n", "xxxx [xxxx] \n", "xxy [xxy, xyx] \n", "xxz [zxx, xxz] \n", "xy [xy, yx] \n", "xyy [xyy, yxy] \n", "xyyzz [xyzzy] \n", "xyz [xyz, zyx, zxy] \n", "xz [zx, xz] \n", "xzz [zzx, zxz] \n", "y [y] \n", "yy [yy] \n", "yyy [yyy] \n", "yyyy [yyyy] \n", "yyz [yyz] \n", "yyzz [zzyy] \n", "yz [yz, zy] \n", "yzz [zzy, zyz, yzz] \n", "z [z] \n", "zz [zz] \n", "zzz [zzz] \n", "zzzz [zzzz] \n", "\n", " signature_length word_count \n", " 0 1 \n", "a 1 1 \n", "aa 2 1 \n", "aaa 3 1 \n", "aaaa 4 1 \n", "aaaaaaaaaahhhhhhhhhh 20 1 \n", "aaaaaaaabbllmm 14 1 \n", "aaaaaaaaddeeeeggggillmnnoprtuuuu 32 1 \n", "aaaaaaaahhhhhhhh 16 1 \n", "aaaaaaahhhhhhh 14 1 \n", "aaaaaaahnnnnnrty 16 1 \n", "aaaaaaaknnnnrrsy 16 1 \n", "aaaaaabbbehiimmrswzz 20 1 \n", "aaaaaabddeemsttt 16 1 \n", "aaaaaaccddnn 12 1 \n", "aaaaaacceeefgilllmnnorrssstttyy 31 1 \n", "aaaaaacefghiillmrrssty 22 1 \n", "aaaaaagmnrsy 12 1 \n", "aaaaaahhhhhh 12 1 \n", "aaaaaakkllss 12 1 \n", "aaaaaannnrstyy 14 1 \n", "aaaaaannrstyy 13 1 \n", "aaaaabbcdrr 11 1 \n", "aaaaabbceiklnrs 15 1 \n", "aaaaabbilmmnrsu 15 1 \n", "aaaaabbilmnnrsu 15 1 \n", "aaaaabblmmnrsuy 15 1 \n", "aaaaabbmt 9 1 \n", "aaaaabccceeegghhiiilnoorrrtuvzz 31 1 \n", "aaaaabcdddeeeglmnnnrrstu 24 1 \n", "... ... ... \n", "wxy 3 2 \n", "wxyz 4 1 \n", "wxz 3 1 \n", "wy 2 2 \n", "wz 2 2 \n", "wzz 3 1 \n", "x 1 1 \n", "xx 2 1 \n", "xxx 3 1 \n", "xxxx 4 1 \n", "xxy 3 2 \n", "xxz 3 2 \n", "xy 2 2 \n", "xyy 3 2 \n", "xyyzz 5 1 \n", "xyz 3 3 \n", "xz 2 2 \n", "xzz 3 2 \n", "y 1 1 \n", "yy 2 1 \n", "yyy 3 1 \n", "yyyy 4 1 \n", "yyz 3 1 \n", "yyzz 4 1 \n", "yz 2 2 \n", "yzz 3 3 \n", "z 1 1 \n", "zz 2 1 \n", "zzz 3 1 \n", "zzzz 4 1 \n", "\n", "[243439 rows x 3 columns]" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAAEACAYAAACH5cABAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGV9JREFUeJzt3X2QZXV95/H3RwggCgRMMVMZQDAyBIxPg47ZENeOLE9a\nArtbkjFZwUhZKSGBjVuWDLuVGSq1G6EqEa0UVBJZGVjNLJAoWFIwUNix3ALBBxYUhKlyQWaQ1oCM\n8aGQh+/+cX8Nl2Eeunv6nnN7+v2qujXn/u455/e9Z6b7M79zfveeVBWSJI3ay/ouQJK0OBg4kqRO\nGDiSpE4YOJKkThg4kqROGDiSpE6MNHCSXJFkKsk9Q22XJLk/yd1J/jHJ/kOvrU6ysb1+4lD7iiT3\nJHkwyaVD7XslWd+2uT3JYUOvndXWfyDJmaN8n5KknRv1COczwElbtW0AXldVbwI2AqsBkhwDnAEc\nDZwCXJYkbZvLgbOrajmwPMn0Ps8GnqiqI4FLgUvavg4E/hx4K/A2YE2SA0bzFiVJMzHSwKmqrwI/\n3qrt1qp6rj29AzikLZ8KrK+qZ6rqIQZhtDLJUmC/qrqrrXcVcHpbPg1Y15avA97Zlk8CNlTVlqp6\nkkHInTyvb06SNCt9X8P5IHBjW14GPDL02ubWtgzYNNS+qbW9aJuqehbYkuSgHexLktST3gInyX8F\nnq6qf5jP3c7jviRJ82jPPjpN8gHgXbxwCgwGo5BDh54f0tq21z68zaNJ9gD2r6onkmwGJrba5svb\nqcUvk5OkOaiqWf0nv4sRThgaeSQ5GfgocGpVPTW03g3Aqjbz7AjgtcCdVfUYg1NlK9skgjOB64e2\nOastvxe4rS3fDJyQ5IA2geCE1rZNVTX2jzVr1vReg3Vap3Va4/RjLkY6wknyOQYjjVcl+T6wBrgQ\n2Au4pU1Cu6Oqzqmq+5JcA9wHPA2cUy+8q3OBK4F9gBur6qbWfgVwdZKNwOPAKoCq+nGSvwC+DhRw\nUQ0mD0iSejLSwKmqP9hG82d2sP5fAn+5jfZvAK/fRvtTDKZSb2tfVzIIKUnSGOh7lppmaGJiou8S\nZsQ655d1zq+FUOdCqHGuMtdzcbuLJLXYj4EkzVYSagwnDUiSZOBIkrph4EiSOmHgSJI6YeAsckuX\nHk6S3h5Llx7e9yGQ1BFnqS3yWWqDD9/2+f4z508tS+qPs9QkSWPLwJEkdcLAkSR1wsCRJHXCwJEk\ndcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXC\nwJEkdcLAkSR1wsCRJHXCwJEkdWKkgZPkiiRTSe4ZajswyYYkDyS5OckBQ6+tTrIxyf1JThxqX5Hk\nniQPJrl0qH2vJOvbNrcnOWzotbPa+g8kOXOU71OStHOjHuF8Bjhpq7YLgFur6ijgNmA1QJJjgDOA\no4FTgMuSpG1zOXB2VS0HlieZ3ufZwBNVdSRwKXBJ29eBwJ8DbwXeBqwZDjZJUvdGGjhV9VXgx1s1\nnwasa8vrgNPb8qnA+qp6pqoeAjYCK5MsBfarqrvaelcNbTO8r+uAd7blk4ANVbWlqp4ENgAnz9sb\nkyTNWh/XcA6uqimAqnoMOLi1LwMeGVpvc2tbBmwaat/U2l60TVU9C2xJctAO9iVJ6sk4TBqoedxX\ndr6KJKkPe/bQ51SSJVU11U6X/bC1bwYOHVrvkNa2vfbhbR5Nsgewf1U9kWQzMLHVNl/eXkFr1659\nfnliYoKJiYntrSpJi9Lk5CSTk5O7tI9UzecAYxsdJIcDX6yq17fnFzO40H9xko8BB1bVBW3SwGcZ\nXORfBtwCHFlVleQO4DzgLuBLwKeq6qYk5wC/VVXnJFkFnF5Vq9qkga8DKxiM4r4OHNuu52xdX436\nGOzI0qWHMzX1cG/9D/T3/iH0efwlzU0SqmpWZ5VGOsJJ8jkGI41XJfk+sAb4OHBtkg8CDzOYmUZV\n3ZfkGuA+4GngnKEkOBe4EtgHuLGqbmrtVwBXJ9kIPA6savv6cZK/YBA0BVy0rbAZB4Ow6fcXviR1\nYeQjnHHX9whnMPO778BxhCNpduYywhmHSQOSpEXAwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCR\nJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1\nwsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdaK3\nwEnyZ0m+neSeJJ9NsleSA5NsSPJAkpuTHDC0/uokG5Pcn+TEofYVbR8PJrl0qH2vJOvbNrcnOazr\n9yhJekEvgZPk14E/BVZU1RuAPYH3ARcAt1bVUcBtwOq2/jHAGcDRwCnAZUnSdnc5cHZVLQeWJzmp\ntZ8NPFFVRwKXApd08uYkSdvU5ym1PYBXJNkTeDmwGTgNWNdeXwec3pZPBdZX1TNV9RCwEViZZCmw\nX1Xd1da7amib4X1dBxw/wvciSdqJXgKnqh4F/gr4PoOg2VJVtwJLqmqqrfMYcHDbZBnwyNAuNre2\nZcCmofZNre1F21TVs8CTSQ4ayRuSJO3Unn10muRXGYxAXg1sAa5N8odAbbXq1s93qdvtvbB27drn\nlycmJpiYmJjHbiVp4ZucnGRycnKX9tFL4AD/DvheVT0BkOTzwO8AU0mWVNVUO132w7b+ZuDQoe0P\naW3bax/e5tEkewD7T/e3teHAkSS91Nb/Gb/oootmvY++ruF8H/jtJPu0i//HA/cBNwAfaOucBVzf\nlm8AVrWZZ0cArwXubKfdtiRZ2fZz5lbbnNWW38tgEoIkqSe9jHCq6s4k1wHfAp5uf/4dsB9wTZIP\nAg8zmJlGVd2X5BoGofQ0cE5VTZ9uOxe4EtgHuLGqbmrtVwBXJ9kIPA6s6uK9SZK2LS/83l6cklSf\nx2AwMOvz76D//hf7v0FpIUpCVW332vi2+E0DkqROGDiSpE4YOJKkThg4kqROGDiSpE4YOJKkThg4\nkqROGDiSpE7MKHCSvH7UhUiSdm8zHeFcluTOJOcM34VTkqSZmlHgVNXbgT9k8O3L30jyuSQnjLQy\nSdJuZVbfpda+5v904FPATxh8EdeFVfVPoylv9Pwutf7797vUpIVnZN+lluQNST4B3A+8E3hPVR3d\nlj8x60olSYvOjEY4Sf4Z+DRwXVX9YqvX3l9VV4+ovpFzhNN//45wpIVnLiOcmQbOK4FfVNWz7fnL\ngH2q6udzqnSMGDj992/gSAvPKG9PcCvw8qHn+7Y2SZJmZKaBs09V/XT6SVvedzQlSZJ2RzMNnJ8l\nWTH9JMmxwC92sL4kSS+y5wzX+8/AtUkeZXDSfynw+yOrSpK025nx53CS/ApwVHv6QFU9PbKqOuSk\ngf77d9KAtPCMbJZa2/nvAIczNCqqqqtm09k4MnD679/AkRaeuQTOjE6pJbka+A3gbuDZ1lzAgg8c\nSVI3ZnoN5y3AMb0OBSRJC9pMZ6l9m8FEAUmS5mSmI5xfA+5Lcifw1HRjVZ06kqokSbudmQbO2lEW\nIUna/c1mltqrgSOr6tYk+wJ7VNW/jrS6DjhLrf/+vTQoLTyjvD3Bh4DrgL9tTcuAL8yuPEnSYjbT\nSQPnAscxuOkaVbUROHhXOk5yQJJrk9yf5DtJ3pbkwCQbkjyQ5Obh21knWZ1kY1v/xKH2FUnuSfJg\nkkuH2vdKsr5tc3uSw3alXknSrplp4DxVVb+cfpJkT3b9PMwngRvbjdzeCHwXuAC4taqOAm4DVrf+\njgHOAI4GTgEuy+BcFMDlwNlVtRxYnuSk1n428ERVHQlcClyyi/VKknbBTAPnn5NcCLw8yQnAtcAX\n59ppkv2Bt1fVZwCq6pmq2gKcBqxrq61jcDtrgFOB9W29h4CNwMokS4H9ququtt5VQ9sM7+s64Pi5\n1itJ2nUzDZwLgB8B9wJ/DNwI/Ldd6PcI4F+SfCbJN5P8XZuIsKSqpgCq6jFeOG23DHhkaPvNrW0Z\nsGmofVNre9E27cZxTyY5aBdqliTtghlNi66q54C/b4/56ncFcG5VfT3JJxiE2tan6eZz+tJ2Z1Os\nXbv2+eWJiQkmJibmsVtJWvgmJyeZnJzcpX3M9BbT/49t/PKvqtfMqdNkCXD79PZJfpdB4PwGMFFV\nU+102Zer6ugkFwy6q4vb+jcBa4CHp9dp7auAd1TVh6fXqaqvJdkD+EFVvWSig9Oi++/fadHSwjPK\nW0y/BXhre7wd+BTwv2ZX3gvaabNHkixvTccD3wFuAD7Q2s4Crm/LNwCr2syzI4DXAne2025bkqxs\nkwjO3Gqbs9ryexlMQpAk9WTGH/x8yYbJN6rq2Dl3nLwR+DTwK8D3gD8C9gCuAQ5lMHo5o6qebOuv\nZjDz7Gng/Kra0NqPBa4E9mEw6+381r43cDXwZuBxYFWbcLB1HY5wHOFImqWR3Q9n+PbSDEZFbwE+\nXFVvnF2J48fA6b9/A0daeEZ2Pxzgr4aWnwEeYvC5GEmSZmTOp9R2F45w+u9/sf8blBaiUd7x8yM7\ner2q/no2nUqSFp/Z3PHzrQxmfgG8B7iTwSf+JUnaqZlOGvgK8O7p2xEk2Q/4UlX92xHXN3KeUuu/\nf0+pSQvPKD+HswT45dDzX7Y2SZJmZKan1K4C7kzy+fb8dF74YkxJknZqNnf8XMHgWwYAvlJV3xpZ\nVR3ylFr//XtKTVp4RnlKDWBf4CdV9UlgU/uKGUmSZmSmkwbWMJipdlRVLU/y68C1VXXcqAscNUc4\n/ffvCEdaeEY5wvn3DG6C9jOAqnoU2G925UmSFrOZBs4v2zCgAJK8YnQlSZJ2RzMNnGuS/C3wq0k+\nBNzK/N2MTZK0CMxmltoJwIkMTvrfXFW3jLKwrngNp//+vYYjLTwjuT1Bu1vmrVX1e7tS3LgycPrv\n38CRFp6RTBqoqmeB55IcMOfKJEmL3ky/aeCnwL1JbqHNVAOoqvNGUpUkabcz08D5p/aQJGlOdngN\nJ8lhVfX9DuvpnNdw+u/fazjSwjOKazhfGNr5P86pKkmS2HngDKfXa0ZZiCRp97azwKntLEuSNCs7\nu4bzLINZaQFeDvx8+iWgqmr/kVc4Yl7D6b9/r+FIC89cruHscJZaVe2xayVJkjQwm/vhSJI0ZwaO\nJKkTBo4kqRMGjiSpE70GTpKXJflmkhva8wOTbEjyQJKbh78wNMnqJBuT3J/kxKH2FUnuSfJgkkuH\n2vdKsr5tc3uSw7p9d5KkYX2PcM4H7ht6fgGDWyEcBdwGrAZIcgxwBnA0cApwWQbziQEuB86uquXA\n8iQntfazgSeq6kjgUuCSUb8ZSdL29RY4SQ4B3gV8eqj5NGBdW14HnN6WTwXWV9UzVfUQsBFYmWQp\nsF9V3dXWu2pom+F9XQccP4r3IUmamT5HOJ8APsqLP3W4pKqmAKrqMeDg1r4MeGRovc2tbRmwaah9\nU2t70Tbtnj5PJjlont+DJGmGZnp7gnmV5N3AVFXdnWRiB6vO50fQt/uJ2LVr1z6/PDExwcTExDx2\nK0kL3+TkJJOTk7u0j53eYnoUkvwP4D8BzzD4ypz9gM8DbwEmqmqqnS77clUdneQCBl+lc3Hb/iZg\nDfDw9DqtfRXwjqr68PQ6VfW1dpvsH1TVwVuV4lfb+NU2kuZgJLeYHoWqurCqDquq1wCrgNuq6v3A\nF4EPtNXOAq5vyzcAq9rMsyOA1wJ3ttNuW5KsbJMIztxqm7Pa8nsZTEKQJPWkl1NqO/Bx4JokH2Qw\nejkDoKruS3INgxltTwPnDA1LzgWuBPYBbqyqm1r7FcDVSTYCjzMINklST3o5pTZOPKXWf/+L/d+g\ntBAtmFNqkqTFx8CRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCR\nJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1\nwsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1opfASXJIktuSfCfJvUnOa+0HJtmQ5IEkNyc5\nYGib1Uk2Jrk/yYlD7SuS3JPkwSSXDrXvlWR92+b2JId1+y41M3uTpLfH0qWH930ApEWjrxHOM8BH\nqup1wL8Bzk3ym8AFwK1VdRRwG7AaIMkxwBnA0cApwGVJ0vZ1OXB2VS0Hlic5qbWfDTxRVUcClwKX\ndPPWNDtPAdXbY2rq4Q7eoyToKXCq6rGqurst/xS4HzgEOA1Y11ZbB5zelk8F1lfVM1X1ELARWJlk\nKbBfVd3V1rtqaJvhfV0HHD+6dyRJ2pner+EkORx4E3AHsKSqpmAQSsDBbbVlwCNDm21ubcuATUPt\nm1rbi7apqmeBJ5McNJI3IUnaqV4DJ8krGYw+zm8jndpqla2f71J387gvSdIs7dlXx0n2ZBA2V1fV\n9a15KsmSqppqp8t+2No3A4cObX5Ia9te+/A2jybZA9i/qp7YVi1r1659fnliYoKJiYldeGeStPuZ\nnJxkcnJyl/aRqvkcRMyi4+Qq4F+q6iNDbRczuNB/cZKPAQdW1QVt0sBngbcxOFV2C3BkVVWSO4Dz\ngLuALwGfqqqbkpwD/FZVnZNkFXB6Va3aRh3V1zFo/TO/A7lZV7Do++/z719aqJJQVbM6c9RL4CQ5\nDvgKcC8vTBm6ELgTuIbByORh4IyqerJts5rBzLOnGZyC29DajwWuBPYBbqyq81v73sDVwJuBx4FV\nbcLB1rUYOIu8fwNHmr0FEzjjxMCx/8X+MyDNxVwCp/dZapKkxcHAkSR1wsCRJHXCwJEkdcLAkSR1\nwsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLA\nkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdcLAkSR1wsCRJHXCwJEkdWLPvgsYB1u2bOm7BEna\n7aWq+q6hV0lqr73276Xv5557hmee+TnQ599BFn3/i/1nQJqLJFRVZrXNYv9hS1L9/cL7CvAO+v6F\nu9j7X+w/A9JczCVwdvtrOElOTvLdJA8m+Vjf9Wjc7E2SXh5Llx7e95uXOrVbB06SlwF/A5wEvA54\nX5Lf7LequZrsu4AZmuy7gBmabH8+xWCE1f1jaurhnVc5ObnTdcaBdc6fhVDjXO3WgQOsBDZW1cNV\n9TSwHjit55rmaLLvAmZosu8CZmiy7wJmZKH88rHO+bMQapyr3T1wlgGPDD3f1NqkMbDz03kXXXSR\np/S023BaNLD//u/ppd9nn32cn/2sl641FqZP5+3I2vaYf1NT+5DM6prvDl100UWzWn/Jklfz2GMP\nzVv/Gn+79Sy1JL8NrK2qk9vzC4CqqouH1tl9D4AkjZDToock2QN4ADge+AFwJ/C+qrq/18IkaRHa\nrU+pVdWzSf4E2MDgetUVho0k9WO3HuFIksbH7j5LbYcWyodCkzyU5P8m+VaSO/uuZ1qSK5JMJbln\nqO3AJBuSPJDk5iQH9Fljq2lbda5JsinJN9vj5J5rPCTJbUm+k+TeJOe19rE6ntuo809b+7gdz72T\nfK39zNybZE1rH7fjub06x+p4tppe1mq5oT2f9bFctCOc9qHQBxlc33kUuAtYVVXf7bWwbUjyPeDY\nqvpx37UMS/K7wE+Bq6rqDa3tYuDxqrqkhfiBVXXBGNa5BvjXqvrrPmublmQpsLSq7k7ySuAbDD4z\n9keM0fHcQZ2/zxgdT4Ak+1bVz9u13P8DnAf8R8boeO6gzlMYv+P5Z8CxwP5VdepcftYX8whnIX0o\nNIzh31VVfRXYOgRPA9a15XXA6Z0WtQ3bqRMGx3UsVNVjVXV3W/4pcD9wCGN2PLdT5/Rn28bmeAJU\n1c/b4t4MrlcXY3Y8Ybt1whgdzySHAO8CPj3UPOtjOXa/xDq0kD4UWsAtSe5K8qG+i9mJg6tqCga/\nnICDe65nR/4kyd1JPt33qZVhSQ4H3gTcASwZ1+M5VOfXWtNYHc92CuhbwGPALVV1F2N4PLdTJ4zX\n8fwE8FFe/MGxWR/LxRw4C8lxVbWCwf8wzm2niBaKcT1nexnwmqp6E4Mf9LE4ddFOU10HnN9GEFsf\nv7E4ntuoc+yOZ1U9V1VvZjBSXJnkdYzh8dxGnccwRsczybuBqTay3dGoa6fHcjEHzmbgsKHnh7S2\nsVNVP2h//gj4PIPTgeNqKskSeP58/w97rmebqupH9cIFzL8H3tpnPQBJ9mTwS/zqqrq+NY/d8dxW\nneN4PKdV1U8YfHneyYzh8Zw2XOeYHc/jgFPbteR/AN6Z5Grgsdkey8UcOHcBr03y6iR7AauAG3qu\n6SWS7Nv+N0mSVwAnAt/ut6oXCS/+X88NwAfa8lnA9Vtv0JMX1dl+QKb9B8bjmP5P4L6q+uRQ2zge\nz5fUOW7HM8mvTZ+GSvJy4AQG15vG6nhup87vjtPxrKoLq+qwqnoNg9+Tt1XV+4EvMstjuWhnqcFg\nWjTwSV74UOjHey7pJZIcwWBUUwwuKH52XOpM8jlgAngVMAWsAb4AXAscCjwMnFFVT/ZVI2y3zt9j\ncP3hOeAh4I+nz0f3IclxDO7Idy8v3MPgQgbfjnENY3I8d1DnHzBex/P1DC5kv6w9/ndV/fckBzFe\nx3N7dV7FGB3PaUneAfyXNktt1sdyUQeOJKk7i/mUmiSpQwaOJKkTBo4kqRMGjiSpEwaOJKkTBo4k\nqRMGjiSpEwaOJKkT/x/8fWRzTFRVZgAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.signature_length.plot.hist()" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAAEACAYAAACH5cABAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFsVJREFUeJzt3X+s3XWd5/Hni8sgosAWZ2mzLQiMoOgwy9Sl7oad7NUJ\nP9zJCDMTsePsgso6ZouDu2Y3gtnYNmYyA1kNuBvMRFF+jKaLzAgYWSgErhsnYIvCUC1TmuwWabGV\nAK0yGinw3j/Ot3C43pZz23s+x3v6fCQn/Z73+X6+38/3ftP7up/v+ZzvSVUhSdKwHTLqDkiSDg4G\njiSpCQNHktSEgSNJasLAkSQ1YeBIkpoYauAkWZLkniQ/SLIhyZ919ZVJtib5Xvc4t6/N5Uk2J3kk\nydl99aVJHk7yaJKr+uqHJVnTtbkvyfF9r13Urb8pyYXDPFZJ0r5lmJ/DSbIIWFRVDyV5PfBd4Dzg\nfcBPq+qz09Y/FfgqcAawBLgbOLmqKsl3gI9W1foktwNXV9WdSf4jcFpVrUjyPuAPqmp5kgXAA8BS\nIN2+l1bVrqEdsCRpr4Y6wqmq7VX1ULf8LPAIsLh7OTM0OQ9YU1XPV9UWYDOwrAuuI6tqfbfeDcD5\nfW2u75ZvBt7VLZ8DrK2qXVW1E1gLvDSSkiS11ew9nCQnAKcD3+lKH03yUJIvJjm6qy0GHu9rtq2r\nLQa29tW38nJwvdSmql4AdiU5Zh/bkiSNQJPA6S6n3Qx8rBvpXAOcVFWnA9uBz8zl7uZwW5KkOXLo\nsHeQ5FB6YXNjVd0KUFVP9q3yBeAb3fI24Li+15Z0tb3V+9s8kWQCOKqqnk6yDZic1ubeGfrnzeQk\naT9U1az+wG8xwvkSsLGqrt5T6N6T2eMPge93y7cBy7uZZycCbwLWVdV2epfKliUJcCFwa1+bi7rl\n9wL3dMt3AmclObqbQHBWV/slVTW2j5UrV468Dx6fx3cwHt84H1vV/v2dPtQRTpIzgT8BNiR5ECjg\nk8D7k5wOvAhsAT4CUFUbk9wEbAR2Ayvq5SO7BLgOOBy4varu6OrXAjcm2Qw8BSzvtvVMkk/Tm6lW\nwOrqTR6QJI3AUAOnqv4OmJjhpTtmqO1p8xfAX8xQ/y5w2gz1XwAX7GVb19ELKUnSiHmngTE3OTk5\n6i4Mlcc3v43z8Y3zse2voX7wcz5IUgf7z0CSZisJ9Ss4aUCSJANHktSGgSNJasLAkSQ1YeBIkpow\ncCRJTRg4kqQmDBxJUhNDv1v0OPnTP/049923/tVXnCMnnXQ8t9zy1/TuVypJ85t3GpjFnQYWLFjC\nzp1XA8cOt1Mv+Tc8//xuJiZmuh2dJI3O/txpwBHOrL2D3lfrDJ8jG0njxPdwJElNGDiSpCYMHElS\nEwaOJKkJA0eS1ISBI0lqwsCRJDVh4EiSmjBwJElNGDiSpCYMHElSEwaOJKkJA0eS1ISBI0lqwsCR\nJDVh4EiSmjBwJElNGDiSpCYMHElSEwaOJKkJA0eS1ISBI0lqwsCRJDUx1MBJsiTJPUl+kGRDkku7\n+oIka5NsSnJnkqP72lyeZHOSR5Kc3VdfmuThJI8muaqvfliSNV2b+5Ic3/faRd36m5JcOMxjlSTt\n27BHOM8DH6+qtwH/CrgkyVuAy4C7q+rNwD3A5QBJ3gpcAJwKvBu4Jkm6bX0euLiqTgFOSXJOV78Y\neLqqTgauAq7strUA+BRwBvAOYGV/sEmS2hpq4FTV9qp6qFt+FngEWAKcB1zfrXY9cH63/B5gTVU9\nX1VbgM3AsiSLgCOran233g19bfq3dTPwrm75HGBtVe2qqp3AWuDcuT9KSdIgmr2Hk+QE4HTgfmBh\nVe2AXigBx3arLQYe72u2rastBrb21bd2tVe0qaoXgF1JjtnHtiRJI3Boi50keT290cfHqurZJDVt\nlenPD2h3s22watWql5YnJyeZnJycw+5I0vw3NTXF1NTUAW1j6IGT5FB6YXNjVd3alXckWVhVO7rL\nZT/u6tuA4/qaL+lqe6v3t3kiyQRwVFU9nWQbMDmtzb0z9bE/cCRJv2z6H+OrV6+e9TZaXFL7ErCx\nqq7uq90GfKBbvgi4ta++vJt5diLwJmBdd9ltV5Jl3SSCC6e1uahbfi+9SQgAdwJnJTm6m0BwVleT\nJI3AUEc4Sc4E/gTYkORBepfOPglcAdyU5EPAY/RmplFVG5PcBGwEdgMrqmrP5bZLgOuAw4Hbq+qO\nrn4tcGOSzcBTwPJuW88k+TTwQLff1d3kAUnSCOTl3+cHpyQ16M9gwYIl7Nx5P72rc8OXTLB793NM\nTEw02Z8kDSoJVTWr98y904AkqQkDR5LUhIEjSWrCwJEkNWHgSJKaMHAkSU0YOJKkJgwcSVITBo4k\nqQkDR5LUhIEjSWrCwJEkNWHgSJKaMHAkSU0YOJKkJgwcSVITBo4kqQkDR5LUhIEjSWrCwJEkNWHg\nSJKaMHAkSU0YOJKkJgwcSVITBo4kqQkDR5LUhIEjSWrCwJEkNWHgSJKaMHAkSU0YOJKkJgwcSVIT\nBo4kqQkDR5LUhIEjSWpiqIGT5NokO5I83FdbmWRrku91j3P7Xrs8yeYkjyQ5u6++NMnDSR5NclVf\n/bAka7o29yU5vu+1i7r1NyW5cJjHKUl6dcMe4XwZOGeG+meramn3uAMgyanABcCpwLuBa5KkW//z\nwMVVdQpwSpI927wYeLqqTgauAq7strUA+BRwBvAOYGWSo4dyhJKkgQw1cKrq28AzM7yUGWrnAWuq\n6vmq2gJsBpYlWQQcWVXru/VuAM7va3N9t3wz8K5u+RxgbVXtqqqdwFrgpZGUJKm9gQInyWlzvN+P\nJnkoyRf7Rh6Lgcf71tnW1RYDW/vqW7vaK9pU1QvAriTH7GNbkqQRGXSEc02SdUlWzMGlqWuAk6rq\ndGA78JkD3F6/mUZOkqRfAYcOslJV/U6Sk4EPAd9Nsg74clXdNdsdVtWTfU+/AHyjW94GHNf32pKu\ntrd6f5snkkwAR1XV00m2AZPT2ty7tz6tWrXqpeXJyUkmJyf3tqokHZSmpqaYmpo6oG2kqgZfufdL\n/Xzgc8BP6I0oPllVf7uPNicA36iq07rni6pqe7f8n4Ezqur9Sd4KfIXem/yLgbuAk6uqktwPXAqs\nB74JfK6q7kiyAvjNqlqRZDlwflUt7yYNPAAspTeKewB4e/d+zvT+1aA/gwULlrBz5/308mv4kgl2\n736OiYmJJvuTpEEloapmdVVpoBFOkt8CPgj8Hr0g+P2q+l6SfwbcB8wYOEm+Sm+k8YYkPwRWAu9M\ncjrwIrAF+AhAVW1MchOwEdgNrOhLgkuA64DDgdv3zGwDrgVuTLIZeApY3m3rmSSfphc0BayeKWwk\nSe0MNMJJ8i3gi8DNVfXzaa/9+6q6cUj9GzpHOJI0e0Mb4dAb2fy8mwlGkkOAw6vqZ/M5bCRJ7Qw6\nS+1u4LV9z4/oapIkDWTQwDm8qp7d86RbPmI4XZIkjaNBA+cfkyzd8yTJ24Gf72N9SZJeYdD3cP4T\n8LUkT9CbCr0IeN/QeiVJGjuDfvBzfZK3AG/uSpuqavfwuiVJGjeDjnCgd+flE7o2S7spcTcMpVeS\npLEz6Ac/bwR+A3gIeKErF707N0uS9KoGHeH8C+CtA39CUpKkaQadpfZ9ehMFJEnaL4OOcH4d2Njd\nJfoXe4pV9Z6h9EqSNHYGDZxVw+yEJGn8DTot+ltJ3kjv6wLuTnIE4B0lJUkDG/Qrpj8M3Az8VVda\nDNwyrE5JksbPoJMGLgHOpPela1TVZuDYYXVKkjR+Bg2cX1TVc3ueJDmU3udwJEkayKCB860knwRe\nm+Qs4GvAN4bXLUnSuBk0cC4DngQ20PtK6NuB/zasTkmSxs+gs9ReBL7QPSRJmrVB76X2/5jhPZuq\nOmnOeyRJGkuzuZfaHocD7wWOmfvuSJLG1UDv4VTVU32PbVV1FfB7Q+6bJGmMDHpJbWnf00PojXhm\n8106kqSD3KCh8Zm+5eeBLcAFc94bSdLYGnSW2juH3RFJ0ngb9JLax/f1elV9dm66I0kaV7OZpXYG\ncFv3/PeBdcDmYXRKkjR+Bg2cJcDSqvopQJJVwDer6t8Nq2OSpPEy6K1tFgLP9T1/rqtJkjSQQUc4\nNwDrkny9e34+cP1wuiRJGkeDzlL78yT/G/idrvTBqnpweN2SJI2bQS+pARwB/KSqrga2JjlxSH2S\nJI2hQb9ieiXwCeDyrvRrwF8Pq1OSpPEz6AjnD4D3AP8IUFVPAEcOq1OSpPEzaOA8V1VF9xUFSV43\nvC5JksbRoIFzU5K/Av5Jkg8Dd+OXsUmSZmHQryf478DNwN8AbwY+VVX/49XaJbk2yY4kD/fVFiRZ\nm2RTkjuTHN332uVJNid5JMnZffWlSR5O8miSq/rqhyVZ07W5L8nxfa9d1K2/KcmFgxynJGl4XjVw\nkkwkubeq7qqq/1pV/6Wq7hpw+18GzplWuwy4u6reDNxDNxEhyVvp3YH6VODdwDVJ0rX5PHBxVZ0C\nnJJkzzYvBp6uqpOBq4Aru20tAD5F73Y87wBW9gebJKm9Vw2cqnoBeHF/fmFX1beBZ6aVz+PlD41e\nT+9DpNCblLCmqp6vqi307tO2LMki4MiqWt+td0Nfm/5t3Qy8q1s+B1hbVbuqaiewFjh3tv2XJM2d\nQe808CywIclddDPVAKrq0v3Y57FVtaNrvz3JsV19MXBf33rbutrzwNa++tauvqfN4922XkiyK8kx\n/fVp25IkjciggfO33WMYag63lVdfRZI0CvsMnCTHV9UPq2ou75u2I8nCqtrRXS77cVffBhzXt96S\nrra3en+bJ5JMAEdV1dNJtgGT09rcu7cOrVq16qXlyclJJicn97aqJB2UpqammJqaOqBtpPfxmr28\nmHyvqpZ2y39TVX806x0kJwDfqKrTuudX0Huj/4oknwAWVNVl3aSBr9B7k38xcBdwclVVkvuBS4H1\nwDeBz1XVHUlWAL9ZVSuSLAfOr6rl3aSBB4Cl9N6negB4e/d+zvT+1b5+Bv0WLFjCzp3308uv4Usm\n2L37OSYmJprsT5IGlYSqmtVVpVe7pNa/sZP2o0NfpTfSeEOSHwIrgb8EvpbkQ8Bj9GamUVUbk9wE\nbAR2Ayv6kuAS4DrgcOD2qrqjq18L3JhkM/AUsLzb1jNJPk0vaApYPVPYSJLamc0I56XlceIIR5Jm\nbxgjnH+e5Cf0Rjqv7ZbpnldVHbUf/ZQkHYT2GThV5Z/WkqQ5MZvvw5Ekab8ZOJKkJgwcSVITBo4k\nqQkDR5LUhIEjSWrCwJEkNWHgSJKaMHAkSU0YOJKkJgwcSVITBo4kqQkDR5LUhIEjSWrCwJEkNWHg\nSJKaMHAkSU0YOJKkJgwcSVITBo4kqQkDR5LUhIEjSWrCwJEkNWHgSJKaMHAkSU0YOJKkJgwcSVIT\nBo4kqQkDR5LUhIEjSWrCwJEkNWHgSJKaMHAkSU0YOJKkJkYWOEm2JPn7JA8mWdfVFiRZm2RTkjuT\nHN23/uVJNid5JMnZffWlSR5O8miSq/rqhyVZ07W5L8nxbY9QktRvlCOcF4HJqvrtqlrW1S4D7q6q\nNwP3AJcDJHkrcAFwKvBu4Jok6dp8Hri4qk4BTklyTle/GHi6qk4GrgKubHFQkqSZjTJwMsP+zwOu\n75avB87vlt8DrKmq56tqC7AZWJZkEXBkVa3v1ruhr03/tm4GfnfOj0CSNLBRBk4BdyVZn+Q/dLWF\nVbUDoKq2A8d29cXA431tt3W1xcDWvvrWrvaKNlX1ArAzyTHDOBBJ0qs7dIT7PrOqfpTknwJrk2yi\nF0L9pj8/ENnbC6tWrXppeXJyksnJyTncrSTNf1NTU0xNTR3QNkYWOFX1o+7fJ5PcAiwDdiRZWFU7\nustlP+5W3wYc19d8SVfbW72/zRNJJoCjqurpmfrSHziSpF82/Y/x1atXz3obI7mkluSIJK/vll8H\nnA1sAG4DPtCtdhFwa7d8G7C8m3l2IvAmYF132W1XkmXdJIILp7W5qFt+L71JCJKkERnVCGch8PUk\n1fXhK1W1NskDwE1JPgQ8Rm9mGlW1MclNwEZgN7CiqvZcbrsEuA44HLi9qu7o6tcCNybZDDwFLG9z\naJKkmeTl39sHpyQ16M9gwYIl7Nx5P70rd8OXTLB793NMTEw02Z8kDSoJVbXX98Zn4p0GJElNGDiS\npCYMHElSEwaOJKkJA0eS1ISBI0lqwsCRJDVh4EiSmjBwJElNGDiSpCYMHElSEwaOJKkJA0eS1ISB\nI0lqwsCRJDVh4EiSmjBwJElNGDiSpCYMHElSEwaOJKkJA0eS1ISBI0lqwsCRJDVh4EiSmjBwJElN\nGDiSpCYMHElSEwaOJKkJA0eS1ISBI0lqwsCRJDVh4EiSmjBwJElNGDiSpCYMHElSE2MfOEnOTfIP\nSR5N8olR92e2Fi/+DZI0eSxadMKoD1fSGBvrwElyCPA/gXOAtwF/nOQto+3V7OzY8RhQB/C4d+B1\ne/uaX6ampkbdhaHy+OavcT62/TXWgQMsAzZX1WNVtRtYA5w34j41NjWLdV/TbDQ1VyOqcf9P7fHN\nX+N8bPvr0FF3YMgWA4/3Pd9KL4Q0o1/QG+20sWNHmu1L0uiNe+DMqcMO+zWOPPIDJK9tsr9nnz2E\nqheb7Gs0eiOqA7V69eqB1jvkkCN48cWfHfD+BrVw4RvZvn1Ls/1Jv+pS1e4v2taS/EtgVVWd2z2/\nDKiquqJvnfH9AUjSEFXVrP5iHPfAmQA2Ab8L/AhYB/xxVT0y0o5J0kForC+pVdULST4KrKU3QeJa\nw0aSRmOsRziSpF8d4z4tep/m+4dCX02SLUn+PsmDSdaNuj8HKsm1SXYkebivtiDJ2iSbktyZ5OhR\n9vFA7OX4VibZmuR73ePcUfZxfyVZkuSeJD9IsiHJpV19LM7fDMf3Z119XM7fa5J8p/tdsiHJyq4+\nq/N30I5wug+FPkrv/Z0ngPXA8qr6h5F2bA4l+b/A26vqmVH3ZS4k+dfAs8ANVfVbXe0K4KmqurL7\no2FBVV02yn7ur70c30rgp1X12ZF27gAlWQQsqqqHkrwe+C69z8R9kDE4f/s4vvcxBucPIMkRVfWz\n7r3xvwMuBf6IWZy/g3mEczB8KDSM0Tmuqm8D08PzPOD6bvl64PymnZpDezk+6J3Hea2qtlfVQ93y\ns8AjwBLG5Pzt5fgWdy/P+/MHUFV7PlPwGnrv/xezPH9j88toP8z0odDFe1l3virgriTrk3x41J0Z\nkmOragf0/tMDx464P8Pw0SQPJfnifL3k1C/JCcDpwP3AwnE7f33H952uNBbnL8khSR4EtgN3VdV6\nZnn+DubAORicWVVLgX8LXNJdshl343aN+BrgpKo6nd5/9Hl9aaa73HQz8LFuJDD9fM3r8zfD8Y3N\n+auqF6vqt+mNTJcleRuzPH8Hc+BsA47ve76kq42NqvpR9++TwNcZz9v67EiyEF66jv7jEfdnTlXV\nk/XyG61fAM4YZX8ORJJD6f0yvrGqbu3KY3P+Zjq+cTp/e1TVT+jdpPFcZnn+DubAWQ+8KckbkxwG\nLAduG3Gf5kySI7q/tkjyOuBs4Puj7dWcCK+8Jn4b8IFu+SLg1ukN5plXHF/3n3iPP2R+n8MvARur\n6uq+2jidv186vnE5f0l+fc/lwPTu7XUWvfepZnX+DtpZatCbFg1czcsfCv3LEXdpziQ5kd6opui9\nwfeV+X58Sb4KTAJvAHYAK4FbgK8BxwGPARdU1c5R9fFA7OX43knv/YAXgS3AR/ZcM59PkpwJ/B9g\nAy9/J8Yn6d394ybm+fnbx/G9n/E4f6fRmxRwSPf4X1X150mOYRbn76AOHElSOwfzJTVJUkMGjiSp\nCQNHktSEgSNJasLAkSQ1YeBIkpowcCRJTRg4kqQm/j8MfkbhCQJ44QAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.word_count.plot.hist()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAEQCAYAAAC0v9O7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X90VfWZ7/H3QwCJUH5EUaZV+bGqY5iBFaFl2UE7x1Ez\n4FXE1ps0pXN1htVb7VCdKesund5b5Dpz2+JqO6u3HVhrWqa2Y4ykTKXC1Br8cToTZjQUTcGCWq+A\n4IwQBVSQWiTP/WPvhJxk5/zMOfuc5PNa66yc7Od8z3nOJuTJ3t/nfLe5OyIiIj1GxZ2AiIiUFxUG\nERFJocIgIiIpVBhERCSFCoOIiKRQYRARkRQlKwxmdpaZPWNmz5nZLjO7J9w+xczazOxFM3vMzCaV\nKicRERnISvk5BjM7293fNbMqYBtwB/BJ4E13v8/M7gKmuPvdJUtKRERSlPRUkru/G949CxgNOHAj\n8INw+w+ApaXMSUREUpW0MJjZKDN7Dngd2Oru24Hz3f0QgLu/DpxXypxERCRVqY8Yut39MuACYIGZ\n/R7BUUPKw0qZk4iIpBodx4u6+9tmlgQWAYfM7Hx3P2Rm04DDUWPMTAVDRCQP7m65PL6UXUnn9nQc\nmVk1cC2wB3gEuDV82C3ATwZ7Dncvq9s999wTew7KaXjlpZyU01Df8lHKI4bfAX5gZqMICtIGd/+p\nmT0NtJrZnwH7gYYS5iQiIv2UrDC4+y5gXsT2I8A1pcpDRETS0yefC5BIJOJOYQDllL1yzEs5ZUc5\nFVdJP+BWCDPzSslVRKRcmBlerpPPIiJSGVQYREQkhQqDiIikUGEQEZEUKgwiIpJChUFERFKoMIiI\nSAoVBhERSaHCICIiKVQYREQkhQqDiIikUGEQEZEUKgwiIpJChUFERFKoMIiISAoVBhERSaHCMIx0\ndXWxfft2urq64k5FRCqYCsMw0dKygenTL+Xaa29j+vRLaWnZEHdKIlKhdGnPYaCrq4vp0y/l5Mmn\ngLnATqqrr2L//heYOnVq3OmJSIx0ac8Rat++fYwdO4OgKADMZcyY6ezbty++pESkYqkwDAMzZszg\nt7/dB+wMt+zk1Kn9zJgxI76kRKRilawwmNkFZvakmf3KzHaZ2RfC7feY2UEzeza8LSpVTsPF1KlT\nWb9+LdXVVzFx4jyqq69i/fq1Oo0kInkp2RyDmU0Dprl7p5lNAHYANwKNwDvu/s0M4zXHkEFXVxf7\n9u1jxowZKgoiAuQ3xzC6WMn05+6vA6+H94+b2R7gQ2E4p6Ql2tSpU1UQRKRgscwxmNkMoA54Jty0\nwsw6zex7ZjYpjpxERCRQ8sIQnkbaCNzp7seBtcAsd68jOKJIe0pJRESKq2SnkgDMbDRBUfhHd/8J\ngLv3/Zjud4HNg41fvXp17/1EIkEikShKniIilSqZTJJMJgt6jpJ+wM3Mfgi84e5f7LNtWjj/gJn9\nJfBRd/90xFhNPouI5CifyedSdiUtBP4F2AV4ePsS8GmC+YZuYB/wOXc/FDFehUFEJEdlXRgKpcIQ\nUEuqiORCS2IMc1ooT0RKQUcMFUIL5YlIPnTEMIxpoTwRKRUVhgqhhfJEpFRUGCqEFsoTkVIp6Qfc\npDBNTY0cOLCflpYWmpruoqmpMe6URGQY0uRzBampmcbRo28DFwAHqamZwJtvHo47LREpY5p8HsbW\nrVsXFoWngZeApzly5Djr1q2LOTMRGW5UGCpES0sLwZHCma4k+FC4XURk6KgwVIimpibgIH27kuC1\ncLuIyNDRHEMFOeec8zhy5DjB9Y1e0xyDiGSkOYZh7s03D7N27Te48srfYe3ab6goiEhRqF21wiQS\nCc4++2wWLFgwILZnzx46OjpYsGABtbW1MWQnIsOBjhgqyBe+8BfMnj2fW2/9CrNnz+cLX7gzq5iI\nSC40x1Ah9uzZw+zZ8wnaVYNF9OBydu/eATBoTEcOIiNbPnMMOpVUITo6OoALSW1XvSDczqAxFQYR\nyZUKQ4UI5hQOEBwN9BwVHOwz15AuJiKSPc0xVIja2lpWrPgscDlwCXA5K1Z8ltra2rQxEZFcaY6h\nwjQ3N9Pa2kpDQwPLli1LiW3ZsoVNmzaxdOlSrr/++pgyFJFyoms+D3P19dexdWuSnkX06usTPPbY\nT4GgK+k73/l7grmGA6xY8Vm+/e1vxZesiJQFFYZhbNu2bVxxxbX07zxqb99KTU2NupJEJJI++TyM\ntbW1EbWIXltbWxYdSyIi2VNhqBD19fVELaJXX1/fr2OpJ6auJBHJT8kKg5ldYGZPmtmvzGyXmd0R\nbp9iZm1m9qKZPWZmk0qVUyVZuHAh9fUJgs6ji4HLqa9PsHDhQnUliciQKtkcg5lNA6a5e6eZTQB2\nADcCfwq86e73mdldwBR3vzti/IieY+ixatUqNmzYQGNjI/fee29KTGsliUh/FTX5bGabgO+Etz90\n90Nh8Ui6+6URjx/xheHCC2dx8ODr9HQlXXjh+bz66t640xKRMlYxk89mNgOoI2ijOd/dDwG4++vA\neXHkVO6am5vDonDm0p4HDhyiubk55sxEZLgp+ZIY4WmkjcCd7n7czPofBgx6WLB69ere+4lEgkQi\nUYwUy1JraytRXUmtra0DPugmIiNXMpkkmUwW9BwlPZVkZqOBLcCj7v6tcNseINHnVNJT7j7gBPlI\nP5XU3NzMZz7zWfp/VuGBB76rwiAigyr7OQYz+yHwhrt/sc+2NcARd1+jyef0LrpoJgcOHKLn0p6a\nYxCRTMq6MJjZQuBfgF0Ep4sc+BLQAbQSfEJrP9Dg7scixo/4wgDp10oSEemvrK/H4O7bgKpBwteU\nKo9Kt3XrVtrb25kyZUrOi+ipnVVEsqG1kiqI2ViCWh60q8JvcX8fgDlz5vH88y/0xubM+V127nyu\nd6wW2RMZmSqmXVVyd+uttxIUhTPtqjCWW2+9lS1btoRF4Uxs164X2bJlCxAcKQRF4WngReBpvvOd\n77Jnz54Y3omIlDsVhgqxefNmotpVN2/ezKZNmyJjwfZsLgsqInKGCkOFuOGGG4haRO+GG25g6dKl\nkbFge//LgvbEtcieiETTHEMFCT4GMpaedtW+cwxz517Grl0v9sYGzjHcyXe+81165iA0xyAyMpR1\nu2qhVBgCs2fPZs+ePdTW1rJ79+6U2Lp162hpaaGpqYnbb799wFh1JYmMPCoMw1xVVTXd3UbPX/1V\nVad5//33AHUdiUg0dSUNYytXrgyLwpnOo9Onq1i5cqW6jkRkSKkwVIiNGzcS1Xm0ceNGdR2JyJBS\nYagQN998M1GdRzfffLO6jkRkSGmOoYKMHn0Wp09X0dN5lDrHoK4jERlIk88jwMqVK9m4cSM333wz\n3/jGN1Ji6joSkf40+TwCJJNJDh48GHkhjmeffZYf//jHPPvss5Fju7q62L59O11dXUXOUkQqmY4Y\nKsiZD7j1LKL3G9y7gczXg25p2cDy5Z9n7NgZ/Pa3+1i/fi1NTY2lfxMiUlI6YhjG5s+fT1AU+i6i\nN4758+dnvB50V1cXy5d/npMnn+Ktt3Zw8uRTLF/+eR05iEgkFYYK0dnZSVS7amdnZ9rrQQPs27eP\nsWNnpMTHjJnOvn37SpS9iFQSFYYKUVdXR1S7al1dHQ0NDZGxYDvMmBGcPuobP3VqPzNmzChR9iJS\nSbKeYzCzO939W5m2FYvmGMBsFDCOM4vonZljyHQ96J45hjFjpnPq1H7NMYiMEEVtVzWzZ919Xr9t\nz7n7Zbm8YL5UGAJVVVV0d3czatQoTp8+nRL71Kc+xaOPPsrixYt56KGHBozdtm0bbW1t1NfXs3Dh\nwlKlLCIxKkphMLMm4NPAFcC/9gl9AOh296tzTTQfKgzpu5LGj5/Cu+++1xsbP34Mx4+/1TtWi+yJ\njEzF6kr6N+AbwAvh157bSuCPc01S8jN9+nSiupKmT5/OfffdFxaFM7ETJ05x3333Abq0p4jkJmNh\ncPf97p5094+5+8/73J71nqvESNG9+uqrRHUevfrqq7S0tETGgu26tKeI5CbrriQz+4SZ/drM3jKz\nt83sHTN7u5jJyRkXXXQRUZ1HF110EU1NTZGxYLsu7Skiucll8vll4AZ3z+v8g5mtB64HDrn73HDb\nPcBngcPhw77k7j8bZLzmGNJ0JU2YMIkTJ071xgbOMWiRPZGRqNhdSdvcPe9WFjO7AjgO/LBfYXjH\n3b+ZxfgRXxgg+Efu0X9/zJo1i7179zJz5kxeeeWVAWObm5tpbW2loaGBZcuWpcS6urrYt28fM2bM\nYOrUqcVJXkRKrtiF4VvANGAT8F7Pdnf/cQ4JTgc29ysMx939G+lHqjBA+q6kdDGA+vrr2Lo12Ruv\nr0/w2GM/BbSOkshwVuy1kiYC7wL1wA3h7fpcXmwQK8ys08y+Z2aThuD5hqXgr/iBXUlTp07l6quv\njowF24PPLwRF4Uy8rS3Jtm3btI6SiAwwOtsHuvufFuH11wL3urub2d8A3wSWD/bg1atX995PJBIk\nEokipFSe3njjDeBi+ncevfHGy7S3twPTB8SC7dDW1kZU11JbWxtjx45l7NgZnDw5cB0lnVISqTzJ\nZDJyWf5c5HIq6fvAgAe7+59l/WL9TiVlGwvjI/pU0tSpU3njjRMEf/XPJegsupxzzx3P3LlzefLJ\nfx8Q+6M/+hhPPPEE27Zt44orrh0Qb2/fyiWXXML06Zdy8uRTvbHq6qvYv/8FFQaRYSCfU0lZHzEA\nW/rcHwfcBPxHLi8GWHgLvjGb5u6vh99+Ang+x+cbMbq6usKupMvp25XU1fUuQGTsiSeeAGDhwoXU\n1ydoazsTr69P9C6LsX79WpYvvyplHSUVBZGRK+8L9Vjwm6jd3f8gy8c/CCSAc4BDwD3AVUAd0A3s\nAz7n7ocGGT+ijxh6BEcOb3DuuecOmAe4+uqraW9v54orrugtCn2lWytJXUkiw1Oxjxj6uxg4L9sH\nu/unIzZ/v4DXH5GOHDmS8rWvEydOcPr0aU6cOBE59ujRo7z22mscPXp0QGzq1KmxFAQVJJHyk8sc\nwzsEcwwWfn0d+Ct3/6fipZfy+iP+iKGQdtU5c+bx/PMv9MbnzPlddu58rrRvoB+1yYoUX1HbVd39\nA+4+sc/XS0pVFCRYbjuqJbWqqorLL788MhZshy1btoRF4Ux8164X2bJly8AXKhG1yYqUr5yu4GZm\nS8zs6+FtKD7DIFnq7u4mquW0u7ubX/ziF5GxYDts2rQpMh5sj4cuNypSvnJZRO9rwJ3A7vB2p5l9\npViJSapRo0YRtVDeqFGj+MhHPhIZC7bD0qVLI+PB9njocqMi5SuXOYadQJ2HJ67NrAp4brDPHQw1\nzTGkX0QvXQxg7tzL2LXrxd54Oc0x6HKjIsVT7LWSdgIJdz8Sfl8DJFUYSivdInrpYgCLFy8mmUyS\nSCR49NFHc3rdYnUPqStJpLiKXRiagK8BTxF0Jn0cuNvdN+SaaD5UGArrSjI7C6jqjZudorv7VFav\nq+4hkcpV7K6kFoKP1v4Y+CfgY6UqCtIzxzCw82jUqFGMGTMmMhZsh9tvv52gKJyJu48Jt6en7iGR\nkSeXyeebgHfd/RF3fwT4jZnFN3s5wgRHSwM7i9yd999/PzIWbIeHH344Mh5sT0/dQyIjTy7tqve4\ne+8lwdz9GMGyFlICwfzBwM4iM2P06NGRsWA73HTTTZHxYHt66h4SGXlymnzuP9FsZrvcfU5RMhv4\n+ppjKKAradSoMbiP6Y3nM8eg7iGRylPsyed/AI4Bfxdu+nOgxt1vzeUF86XCECikK2n+/Pl0dnZS\nV1fHjh07UmKZuoPi6EpSx5JI4fIpDLh7VjdgPEFX0i+A7cBXgPHZji/0FqQ6skGVQ7XDxeFXyyrm\n7j5mzPiU+Nix1b2xBx98yKura3zSpHleXV3jDz74UEneT7rXjSsnkeEm/N2Z2+/bXAcM+kTw7aF6\nrkGefyj3VcUBwl/sv3Tw8Gu1Az569OjI2OjRo93d/ctf/nJk/Mtf/rIfPnzYq6trUmLV1TV++PDh\nor6fdK8bV04iw1E+hSGntZIyWJj5IVKYgZ1FQMaupA0bNkTGN2zYEFvXUbrXVSeUSLyGsjBI0Q3s\nLAIydiU1NjZGxhsbG2PrOkr3uuqEEolZrocYg92AZ4fquQZ5/qE8uqpIYOEpoQ9HzDEMHnN3Hzu2\nOiUeNccwceJlscwxRL1uXDmJDDfkcSop70t79mdmz7n7ZUPyZNHP70OVayVL13k0ZswY3n//fUaP\nHs2pUwNbUVetWsWGDRtobGzk3nvvTYnF1QGkriSR4ipqu2oWL36ru98/JE8W/fwqDBTWrjpr1iz2\n7t3LzJkzeeWVV1Jie/bsoaOjgwULFlBbWztgbL7Xi043TkSKryjtqsBm4JHBbrkeouR7Q6eSCmpX\nTRdfseLOcNslDtW+YsUdKWOvvXZxytj6+sW9sXRtpenGiUhpUIx2VeAPw9u3gA3ADeHtQeBvc33B\nfG8jvTCQpl01XczdfebMmZHxmTNn+u7duyNju3fvdnf39vb2yHh7e3vattJ040SkdPIpDBm7ktz9\n5+7+c2Chuze6++bw9mngypwOT6RA0e2qmWJ79+6NjO/du5eOjg7gwn6xC8Lt0NbWFjm2ra0tbVtp\nunEiUt5yaVcdb2azer4xs5kEn4aWkoluV80UmzlzZmR85syZLFiwADjQL3Yw3A719fWRY+vr69O2\nlaYbJyJlLttDC2AR8CqQBH4O7APqcxi/HjgE7OyzbQrQBrwIPAZMSjO+CAdZlaWQdtV08RUr7kiZ\nC+g/x1BfvzhlbNQcQ1RbabpxIlIaFKtd1YKlOy8HdgCXhptfcPf3si1AZnYFcBz4oYertJrZGuBN\nd7/PzO4Cprj73YOM92xyHe4K6UpKF7/66qtpb2/niiuu4IknnhgwdsuWLWzatImlS5dy/fXXZx1r\nbm6mtbWVhoYGli1blsU7FJGhVOxF9J7LtepEPMd0Uo8YXgDOD+9PIyg2OmIYRLG6kjKNzbfzKFO3\nk4gUH8VcRA/4OvBJws8+5HOLKAxH+sWPpBk79HusglBAV9L48eMj4+PHj/cbb7wxMnbjjTe6e/rF\n7tJ1HmXqdhKR0sinMIzO4eDic8AXgdNmdhKw8AUn5nSIkl7ac0WrV6/uvZ9IJEgkEkP40pUgqvPo\n5YyxEydOABcPiJ848TKPP/545Nhg+5nF7k6ezK3zaNasWQzW7RT1AToRGRrJZJJkMlnYk+RaSQq5\nMfCIYQ+pp5L2pBk7xHW0sqAjBhHJA8W+HgOwhOCU0teB63N+MZgB7Orz/RrgrvD+XcDX0owtyk6r\nJMXqSso0Nt/Oo0zdTiJSfPkUhlwu7fk14KNAc7ipCfiFu/9VluMfBBLAOQRtq/cAm4AfEZxz2A80\nuPuxQcZ7trkOZ4V0JU2YMIETJ04wfvx4jh8/nhJbunQpjz/+ONdccw2bNm0aMDbf9ZAyrcEkIsWV\nT1dSLnMM1wF1Hl5h3sx+ADwHZFUYPPikdJRrcshBChDMNZz52teBAwc4efIkBw4ciBy7ceNGWlpa\naGpq4vbbb8/6NWtra1UQRCpMLkcMO4GEux8Jv68Bkh5+JqHYdMQAZqOBsQQTvgeB3xDW6bSxQsfW\n1Ezj6NG3e+M1NRN4883DANTXX8fWrcneWH19gsce+2lxdoCI5CyfI4ZclsT4CvCsmd0fHi3sAP5P\nLi8m+QtOE40FngZeCr+Ow8zSxjKNnT9/fmQs2A7r1q0Li8KZ+JEjx1m3bh3btm0Li8KZWFtbkm3b\nthV9f4hI8eRSGK4H/oGgIGwEPubuG4qSlQwiv0X00sU7OzsjY8F2aGlpiYy3tLRooTyRYSqXwrA+\n/LqEYAnuvzOzO4c+JRlcfovopYvX1dVFxoLt0NTUFBlvamrSQnkiw1ROV3AzsyqCzqSrgNuAk+5+\nafpRQ0NzDBAsWTWO4K/910idJxg8VujYc845jyNHjvfG+84x/PEfX0dbW7I3pjkGkfJS1Et7mtkT\nBMts/zvwr0C7ux/OOcs8qTAEirWIXqax6RbZK9blO3XNZ5HCFbsw/C0wH3gP2Ab8C/Dv7n4y10Tz\nocIQX1dSVVU13d3WG6+qOs3772e9sG5eWlo2sHz55xk7Nrjmw/r1a2lqaizqa4oMR0XtSnL3v3T3\njwOfAN4Evg9EfhhNhl6xupKmTJkSGQu2w8qVK8OicCZ++nQVK1euLNp77erqYvnyz3Py5FO89dYO\nTp58iuXLP09XV1fRXlNEzsi6MJjZCjPbQPChthsJOpQWFysxiTL0XUnHjh2LjAXbgw+2RcWD7cWR\n7pKhIlJ8uXQljQO+CVzq7te4+/929yeLlJdEGvqupMmTJ0fGgu1w8803R8aD7cWR7pKhIlJ8OXUl\nxUlzDPF1JY0efRanT1f1xks5xzBmzHROndqvOQaRPBV18jluKgyBuLqSFi9eTDKZJJFI8Oijj6bE\n1JUkUr5UGIa5uLqSLrxwFgcPvt4bv/DC83n11b2A1koSKXfFXitJYlSsrqRMY5ubm8OicCZ+4MAh\nmpubtVaSyDClwlBRhr4rKVOstbU1Mt7a2qq1kkSGKRWGijL0XUmZYg0NDZHxhoYGrZUkMkxpjqGC\nxNWVdNFFMzlw4FBvvO8cg9ZKEilvmnweAeLqSmpubqa1tZWGhgaWLVuWEitWV5KIFK7Yl/aUEsj0\nCzou7e3tPPPMM3zwgx8cUBguueQSxo4dO+QfQFO7qkg8dMRQRorZclrY2LOAqt642Sm6u08BxVvs\nTovoiQyNfI4YcPeKuAWpDl+AQ7XDLx08/FrtPe87XbyYY2+77bbI+G233eaHDx/26uqalFh1dY0f\nPny4oH1RrOcVGYnC/8s5/b5VV1JZKU7LaSFjH3744cj4ww8/XLTF7rSInki8VBjKSnFaTgsZe9NN\nN0XGb7rppqItdqdF9ETiVRZzDGa2D3gL6AZOufuCiMd4OeRaTMVsOS1k7KhRY3Af0xuPmmMY6sXu\ntIieyNCo2HZVM3sFmO/uR9M8ZtgXBihuy2khY6uqquju7mbUqFGcPn06JbZq1So2bNhAY2Mj9957\nb0psz549dHR0sGDBAmprawc8b7rOI3UliRSuYiefgb3AORkeU9gMTAWAqnCi9+Lwq2Udj2vs2WdP\nTomNHz+xN7ZixZ1h7BKHal+x4o6U533wwYe8urrGJ02a59XVNf7ggw/lv/NEJBJ5TD7HXhSCvHkF\neBbYDnx2kMcM8e4qL5RpV9KECRMi4xMmTPA1a9ZExtasWeO7d++OjO3evdvd1XkkUir5FIZy+YDb\nQnf/TzObCmw1sz3u3t7/QatXr+69n0gkSCQSpcuwJKK6g17OMl6cscePHwcuHhA/fvxlWlpaIse2\ntLRw/vnnAxf2i11AR0cHtbW1vZ1HJ08O7DzSaSOR/CWTSZLJZGFPkmslKfYNuAf4YsT2Iayh5Qcd\nMeiIQaQIqMRTScDZwITw/nhgG1Af8bih3l9lByz8Zfphjz7XP3g8rrHjx09MiaXOMdzhfecfBptj\nmDjxMs0xiBRJPoUh9q4kM5sJPEzw1+tooNndvxbxOI8711Io166kdPFx48bx3nvvcdZZZ/Gb3/wm\nJbZu3TpaWlpoamri9ttvH/C8W7ZsYdOmTSxdupTrr78+JaauJJHCVWxXUjY3RsQRQ+V1JaWL/f7v\nX5YSmzOnLuV5r712cUq8vn5xb0wdSyJDg0o8lZR1osO8MFCmcwzp4pMnT46MTZ482Tdv3hwZ27x5\ns7u7t7e3R8bb29s1/yAyhPIpDFoSo6yU31pJ6eLHjh2LjB07doxNmzZFxoLtpL0sqNZKEomXCkNZ\nKb+1ktLFJ0+eHBmbPHkyS5cujYwF20l7WVCtlSQSs1wPMeK6McxPJbl72u6fTPFyHDtnTl1KrP8c\nQ3394pR41ByDOpZECkMldiVlS11JmeNxjZ0yZQrHjh1j8uTJHD2autxVuq4jSH9ZUHUliRSuYhfR\ny4YKQ+Z4OY7N9Lzz58+ns7OTuro6duzYkRJTq6tI4VQYKlz5Xtozv7GFPG9NzTSOHn27N1ZTM4E3\n3zzcO3bOnHk8//wLvfE5c36XnTufA3RZUJG+9DmGCkYFtqsW8rzz5s2LjM+bN8/Xrl0bGVu7dq27\ne9pWWLW6iqRC7aqVrrLaVQuJdXZ2RsY7OzvTLs4HpG2FVaurSOFUGMpKZbWrFhKrq6uLjNfV1dHU\n1BQZC7aTthVWra4ihdMcQxkp10t75ju2kOc955zzOHLkeG+s/xzD3LmXsWvXi73xqDkGXRZURJPP\nFaPSOosKGRtXTpdccgm//vWvufjii3nppZcGjF28eDHJZJJEIsGjjz6aEkvXQgvQ3NxMa2srDQ0N\nLFu2bEB8MJk6pTJdBlUkH5p8rgCVthBeIWPLMadM8XQL+7m7X3DBzJT4hRfO8GxkWhQw02VQRfKF\nFtErb1RYZ1EhY+PKqba2NjJWW1vr7u6LFi2KjC9atCjtwn7u7g888EBk/IEHHkj7756pUyrTRY1E\nCqHCUOaCX2gXh//5e24f7vPLMDpWiWPLMSd393HjxkXGx40b56tWrYqMrVq1yt3dlyxZEhlfsmRJ\n2n/3jo4OnzRpXsq4iRMv846ODnd3v//++z04Uuj7vBf7/fffP8Q/gTIS5VMY1JVUcpXWWVTI2NLn\nFJybHxjrOWcfXCd8YDyRSKRd2A+goaEhMh5sH1ymTqkFCxYAB/o978Fwu0gMcq0kcd0YBkcM7u6V\nthBeIWPLMadM8XQL+7m7X3jhjJR4rnMMgy0KmOkyqCL5Io8jBnUlxaAcu3iGW1fS7Nmz2bNnD7W1\ntezevXvAWHUlyUihrqQSOnz4sHd0dEQutXDeeec54Oedd17k2HRxes+TR7/fdPFyHFuOOcU1thxz\nyhQ3Mwc8/MMsxS233OI1NTV+yy23RD5vY2OjT5w40RsbG3OKuQdNAuPGjfNFixYNiK1du9avvPLK\n3iVScolv3rzZly9f3nslwf52797t999/f+TE/wMPPOBLliyJbDZINy6beDqFjHXP74gh9l/4WSda\nRoUhXetcJNdpAAAKb0lEQVRhJbZoql1V+yL39zOmX6yq3/OO7hcflVUs0+tOmXJ+SqymZmrK2HTx\nTNcgT9cynK5NOVOrcSGtyEPRxqzCUALpWg+DI4GBbYc9Rwbp4sFfbOXVclrI2HLMSfsi+7HBkcLA\nmJn5LbfcEhnrOXJobGyMjDc2NqaNuadvJ860uGK6eKZrkKdrGU7Xppyp1biQVuShamNWYSiBdK2H\nwX+08mvRjGNsOeakfTE0Y2tqaiJjNTU17u4+ceLEyPjEiRPTxtzTtxNfeeWVkbErr7zS3T1tfPny\n5ZGx5cuXu3v6luF0bcqZWo0LaUUeqjZmFYYS0BHD8P0rWftCRww6YiijwgAsAl4AXgLuGuQxOe2M\nYkrXeliJLZpqV9W+yP39VPWL9Z9jGNUvPiqrWKbXramZmhLrP8eQLp7pGuTpWobTtSlnajUupBV5\nKNqYK7IwECz9/TIwHRgDdAKXRjwu5x1STOpKqtyc4hpbjjlliqsr6YyR1JUU++cYzOxy4B53Xxx+\nf3f4Rtb0e5zHnauISKXJ53MM5bAkxocI1gPocZCBlxATEZESGR13ArlYvXp17/1EIhGueyMiIj2S\nySTJZLKg5yiXU0mr3X1R+L1OJYmIDJFKPZW0HfiwmU03s7HAp4BHYs5JRGTEiv1UkrufNrMVQBtB\noVrv7ntiTktEZMSK/VRStnQqSUQkd5V6KklERMqICoOIiKRQYRARkRQqDCIikkKFQUREUqgwiIhI\nChUGERFJocIgIiIpVBhERCSFCoOIiKRQYRARkRQqDCIikkKFQUREUqgwiIhIChUGERFJocIgIiIp\nVBhERCSFCoOIiKRQYRARkRQqDCIikkKFQUREUqgwiIhIilgLg5ndY2YHzezZ8LYoznxERKQ8jhi+\n6e7zwtvP4k4mF8lkMu4UBlBO2SvHvJRTdpRTcZVDYbC4E8hXOf4gKKfslWNeyik7yqm4yqEwrDCz\nTjP7nplNijsZEZGRruiFwcy2mtnOPrdd4dcbgLXALHevA14HvlnsfEREJD1z97hzAMDMpgOb3X3u\nIPHySFREpMK4e06n7EcXK5FsmNk0d389/PYTwPODPTbXNyYiIvmJtTAA95lZHdAN7AM+F286IiJS\nNqeSRESkPJRDV1LWyuUDcWa2yMxeMLOXzOyuOHKIYmb7zOyXZvacmXXElMN6MztkZjv7bJtiZm1m\n9qKZPVbq7rNBcor1Z8nMLjCzJ83sV2FDxh3h9tj2VUROXwi3x72vzjKzZ8Kf611mdk+4Pc59NVhO\nsf+OMrNR4Ws/En6f836qqCOGcOe/4+6xdS+Z2SjgJeBq4D+A7cCn3P2FuHLqYWavAPPd/WiMOVwB\nHAd+2NNIYGZrgDfd/b6wkE5x97tjzinWnyUzmwZMc/dOM5sA7ABuBP6UmPZVmpwaif//3dnu/q6Z\nVQHbgDuATxLvz1VUTouJf1/9JTAfmOjuS/L5/1dRRwyhuCehFwC/dvf97n4KeIjgP085MGL+N3X3\ndqB/YboR+EF4/wfA0jLICWL8WXL31929M7x/HNgDXECM+2qQnD4UhmP9f+fu74Z3zyKYG3Xi/7mK\nygli3FdmdgFwHfC9Pptz3k+VWBji/kDch4ADfb4/yJn/PHFzYKuZbTezz8adTB/nufshCH75AOfF\nnE+PuH+WADCzGUAd8DRwfjnsqz45PRNuinVfhadHniP4vNNWd99OzPtqkJwg3n31t8D/4EyRgjz2\nU9kVBtMH4gqx0N3nEfzF8OfhKZRyVA7nL8viZyk8ZbMRuDP8K73/vin5vorIKfZ95e7d7n4ZwVHV\nAjP7PWLeVxE5zSbGfWVm/wU4FB71pTtqybif4m5XHcDdr83yod8FNhczl0G8BlzU5/sLwm2xc/f/\nDL92mdnDBKe92uPNCoBDZna+ux8Kz2Mfjjshd+/q820sP0tmNprgF/A/uvtPws2x7quonMphX/Vw\n97fNLAksokx+rvrm1G9uodT7aiGwxMyuA6qBD5jZPwKv57qfyu6IIZ3wTfVI+4G4ItoOfNjMppvZ\nWOBTwCMx5JHCzM4O/9LDzMYD9cSzfyD4a6XvXyyPALeG928BftJ/QAmk5FQmP0v/AOx292/12Rb3\nvhqQU9z7yszO7TklY2bVwLUE8x+x7atBcnohzn3l7l9y94vcfRbB76Un3f1PCIrTreHDsttP7l4x\nN+CHwE6gE9hEcO4sjjwWAS8Cvwbujnu/hDnNDPfLc8CuuPICHiTo1noPeJWgy2YK8Hi4z9qAyWWQ\nU6w/SwR/3Z3u82/2bPhzVRPXvkqTU9z7ak6YS2eYx/8Mt8e5rwbLqVx+R/0h8Ei++6mi2lVFRKT4\nKupUkoiIFJ8Kg4iIpFBhEBGRFCoMIiKSQoVBRERSqDCIiEgKFQYREUmhwiAVx8z+3swuHeLnnGRm\ntw/lcw7yOveY2ReL8Lx3mtm4Pt+/M9SvISOHCoNUHHf/7z7017+YAnw+10Hh9TnKwV8A4/t8r0+u\nSt7K5YdaJFK4BtSW8EpZO82swcyeMrN5YXx5eGWqp8Mjif8bbv++mX3LzLaZ2ctm9olw+3gze9zM\nfmHB1e5uCF/qq8Cs8MpXa8zsD81sc588vm1m/y28v9fMvmZmvwBuNrNZZvZouNz5z83skizfW+S4\nNLmbma01s90WXJHrn83sExZcae2DwJNm9sSZp7e/CZd//jczm1rov4WMHCoMUu4WAa+5+2UeXH3t\nZz0BM/sd4H8RrCK7EOh/emmauy8EbgDWhNt+Ayx1948Af8SZZZHvBv6fu89z957Ltab7q/sNd/+I\nu7cCfw+scPePEqyFvy7L95ZuXFTunwQucvfZwJ8AHwNw928TrAWVcPerw8eOB/7Ng+Wf/xUop+tz\nSJkru2W3RfrZBXzdzL4K/LO7t5v1LpK6AEi6+1sAZvYj4OI+YzcBuPseM+u5OIkBXzWzjwPdwAf7\nxHKxIXzN8cAfAD+yM4mNyTQ4i3FRuS8EfhRuP2RmT/V/2j7333P3n4b3dwDXZPWuRFBhkDLn7r8O\nTxtdB/y1mT1J6l/y6S5I8l7E45YB5wKXuXu3me0Fxg0YCe+TekTd/zEnwq+jgKMeXCApF5nGReWe\ni1N97p9G/9clBzqVJGUtPF100t0fBL4O9P1Fuh34eNhRNJrgVMugTxV+nQQcDovCVcD0cPs7wAf6\nPH4/MNvMxpjZZOBqIrj7O8BeM7u5T85zM72vHMf15L4N+GQ413A+kOjzmLeBiRFjRHKmwiDlbg7Q\nYcG1dVcBf90TcPf/AL4CdBCcR98LvNUT7vc8Pd83Ax81s18CnyG44AvufgTYFk5wr3H3gwSnbZ4H\nHiJYe7//c/VYBiwPJ3qfB5Zk+d4+M8i4wXL/J4JrjP+KYN3/HX3e73eBn/WZfFZXkuRN12OQimZm\n4939hJlVAQ8D6/3MZTKHnT7vtwZ4huA637FfKlWGF513lEq32syuAc4C2oZzUQhtCU9tjQHuVVGQ\nYtARg0gRmNmXgP9KcErHwq8/cvevxpqYSBZUGEREJIUmn0VEJIUKg4iIpFBhEBGRFCoMIiKSQoVB\nRERS/H9YkFAzDEoKLgAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df.plot.scatter(x='signature_length', y='word_count')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's conclude: \n", "\n", "- long words have few anagrams\n", "- too short words have few anagrams\n", "- words between 4 and 12 letters have a couple of anagrams" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.4" } }, "nbformat": 4, "nbformat_minor": 0 }