{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Your results won't look exactly like this notebook, as I did slightly different processing locally."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table style=\"border: 2px solid white;\">\n",
       "<tr>\n",
       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
       "<h3 style=\"text-align: left;\">Client</h3>\n",
       "<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n",
       "  <li><b>Scheduler: </b>tcp://127.0.0.1:35429</li>\n",
       "  <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a>\n",
       "</ul>\n",
       "</td>\n",
       "<td style=\"vertical-align: top; border: 0px solid white\">\n",
       "<h3 style=\"text-align: left;\">Cluster</h3>\n",
       "<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n",
       "  <li><b>Workers: </b>4</li>\n",
       "  <li><b>Cores: </b>12</li>\n",
       "  <li><b>Memory: </b>33.35 GB</li>\n",
       "</ul>\n",
       "</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<Client: scheduler='tcp://127.0.0.1:35429' processes=4 cores=12>"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import dask.dataframe as dd\n",
    "from dask.distributed import Client, progress\n",
    "Client()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>argument_0</th>\n",
       "      <th>symbol</th>\n",
       "      <th>operation</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td>window.navigator.userAgent</td>\n",
       "      <td>get</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td>window.navigator.userAgent</td>\n",
       "      <td>get</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>window.navigator.userAgent</td>\n",
       "      <td>get</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td>window.navigator.appName</td>\n",
       "      <td>get</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td>window.navigator.appVersion</td>\n",
       "      <td>get</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  argument_0                       symbol operation\n",
       "0              window.navigator.userAgent       get\n",
       "1              window.navigator.userAgent       get\n",
       "2              window.navigator.userAgent       get\n",
       "3                window.navigator.appName       get\n",
       "4             window.navigator.appVersion       get"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = dd.read_parquet('/home/bird/Data/tt/full/overscripted.parquet/', columns=('argument_0', 'symbol', 'operation'))\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "fillText = df[df.symbol == 'CanvasRenderingContext2D.fillText']\n",
    "fillText = fillText.persist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "progress(fillText, notebook=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>argument_0</th>\n",
       "      <th>symbol</th>\n",
       "      <th>operation</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>call_id</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__125</th>\n",
       "      <td>Soft Ruddy Foothold 2</td>\n",
       "      <td>CanvasRenderingContext2D.fillText</td>\n",
       "      <td>call</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__132</th>\n",
       "      <td>!H71JCaj)]# 1@#</td>\n",
       "      <td>CanvasRenderingContext2D.fillText</td>\n",
       "      <td>call</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__188</th>\n",
       "      <td>&lt;@nv45. F1n63r,Pr1n71n6!</td>\n",
       "      <td>CanvasRenderingContext2D.fillText</td>\n",
       "      <td>call</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__197</th>\n",
       "      <td>668</td>\n",
       "      <td>CanvasRenderingContext2D.fillText</td>\n",
       "      <td>call</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1_00021485d883465dc356bceabf4203dec5012044c643ab3498da2d1c.json__30</th>\n",
       "      <td>Soft Ruddy Foothold 2</td>\n",
       "      <td>CanvasRenderingContext2D.fillText</td>\n",
       "      <td>call</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                                  argument_0  \\\n",
       "call_id                                                                        \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...     Soft Ruddy Foothold 2   \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...           !H71JCaj)]# 1@#   \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...  <@nv45. F1n63r,Pr1n71n6!   \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...                       668   \n",
       "1_00021485d883465dc356bceabf4203dec5012044c643a...     Soft Ruddy Foothold 2   \n",
       "\n",
       "                                                                               symbol  \\\n",
       "call_id                                                                                 \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...  CanvasRenderingContext2D.fillText   \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...  CanvasRenderingContext2D.fillText   \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...  CanvasRenderingContext2D.fillText   \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...  CanvasRenderingContext2D.fillText   \n",
       "1_00021485d883465dc356bceabf4203dec5012044c643a...  CanvasRenderingContext2D.fillText   \n",
       "\n",
       "                                                   operation  \n",
       "call_id                                                       \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...      call  \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...      call  \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...      call  \n",
       "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25...      call  \n",
       "1_00021485d883465dc356bceabf4203dec5012044c643a...      call  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fillText.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[########################################] | 100% Completed |  8.6s"
     ]
    }
   ],
   "source": [
    "_arg_counts = fillText.argument_0.value_counts().persist()\n",
    "progress(_arg_counts, notebook=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>argument_0</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>πŸ˜ƒ</td>\n",
       "      <td>37327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Cwm fjordbank glyphs vext quiz, πŸ˜ƒ</td>\n",
       "      <td>21436</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td>14313</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>πŸ‡ΊπŸ‡³</td>\n",
       "      <td>12062</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>πŸ§šβ€β™‚οΈ</td>\n",
       "      <td>10422</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>πŸ§šβ€‹β™‚οΈ</td>\n",
       "      <td>10422</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>πŸ‡Ίβ€‹πŸ‡³</td>\n",
       "      <td>10422</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>45</td>\n",
       "      <td>8637</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>38</td>\n",
       "      <td>8340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>!H71JCaj)]# 1@#</td>\n",
       "      <td>8149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Soft Ruddy Foothold 2</td>\n",
       "      <td>8149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>!image!</td>\n",
       "      <td>7301</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>e</td>\n",
       "      <td>6045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>201708</td>\n",
       "      <td>5785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>201706</td>\n",
       "      <td>5785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>201704</td>\n",
       "      <td>5785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>0</td>\n",
       "      <td>5550</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>a</td>\n",
       "      <td>4787</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>i</td>\n",
       "      <td>4471</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>n</td>\n",
       "      <td>4194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>o</td>\n",
       "      <td>4096</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>t</td>\n",
       "      <td>3974</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>http://valve.github.io</td>\n",
       "      <td>3912</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>r</td>\n",
       "      <td>3621</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>s</td>\n",
       "      <td>3447</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>&lt;@nv45. F1n63r,Pr1n71n6!</td>\n",
       "      <td>3347</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>🐨</td>\n",
       "      <td>3315</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>10</td>\n",
       "      <td>3289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>11</td>\n",
       "      <td>3230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>月份</td>\n",
       "      <td>3087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>06</td>\n",
       "      <td>2799</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>07</td>\n",
       "      <td>2794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>08</td>\n",
       "      <td>2785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>09</td>\n",
       "      <td>2782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>2,000</td>\n",
       "      <td>2706</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>$201000</td>\n",
       "      <td>2697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>$197000</td>\n",
       "      <td>2697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>$196000</td>\n",
       "      <td>2697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>$200000</td>\n",
       "      <td>2697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>$198000</td>\n",
       "      <td>2697</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           argument_0  count\n",
       "0                                   πŸ˜ƒ  37327\n",
       "1   Cwm fjordbank glyphs vext quiz, πŸ˜ƒ  21436\n",
       "2                                      14313\n",
       "3                                  πŸ‡ΊπŸ‡³  12062\n",
       "4                                πŸ§šβ€β™‚οΈ  10422\n",
       "5                                πŸ§šβ€‹β™‚οΈ  10422\n",
       "6                                 πŸ‡Ίβ€‹πŸ‡³  10422\n",
       "7                                  45   8637\n",
       "8                                  38   8340\n",
       "9                     !H71JCaj)]# 1@#   8149\n",
       "10              Soft Ruddy Foothold 2   8149\n",
       "11                            !image!   7301\n",
       "12                                  e   6045\n",
       "13                             201708   5785\n",
       "14                             201706   5785\n",
       "15                             201704   5785\n",
       "16                                  0   5550\n",
       "17                                  a   4787\n",
       "18                                  i   4471\n",
       "19                                  n   4194\n",
       "20                                  o   4096\n",
       "21                                  t   3974\n",
       "22             http://valve.github.io   3912\n",
       "23                                  r   3621\n",
       "24                                  s   3447\n",
       "25           <@nv45. F1n63r,Pr1n71n6!   3347\n",
       "26                                  🐨   3315\n",
       "27                                 10   3289\n",
       "28                                 11   3230\n",
       "29                                 月份   3087\n",
       "30                                 06   2799\n",
       "31                                 07   2794\n",
       "32                                 08   2785\n",
       "33                                 09   2782\n",
       "34                              2,000   2706\n",
       "41                            $201000   2697\n",
       "39                            $197000   2697\n",
       "40                            $196000   2697\n",
       "36                            $200000   2697\n",
       "38                            $198000   2697"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arg_counts = _arg_counts.compute().reset_index().rename(columns={\n",
    "    'index': 'argument_0',\n",
    "    'argument_0': 'count'\n",
    "}).sort_values('count', ascending=False)\n",
    "arg_counts.head(40)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[########################################] | 100% Completed |  8.5s"
     ]
    }
   ],
   "source": [
    "_operation_counts = fillText.operation.value_counts().persist()\n",
    "progress(_operation_counts, notebook=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "call            542838\n",
       "set                 58\n",
       "set (failed)         0\n",
       "get                  0\n",
       "Name: operation, dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "operation_counts = _operation_counts.compute()\n",
    "operation_counts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}