{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Your results won't look exactly like this notebook, as I did slightly different processing locally." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<table style=\"border: 2px solid white;\">\n", "<tr>\n", "<td style=\"vertical-align: top; border: 0px solid white\">\n", "<h3 style=\"text-align: left;\">Client</h3>\n", "<ul style=\"text-align: left; list-style: none; margin: 0; padding: 0;\">\n", " <li><b>Scheduler: </b>tcp://127.0.0.1:35429</li>\n", " <li><b>Dashboard: </b><a href='http://127.0.0.1:8787/status' target='_blank'>http://127.0.0.1:8787/status</a>\n", "</ul>\n", "</td>\n", "<td style=\"vertical-align: top; border: 0px solid white\">\n", "<h3 style=\"text-align: left;\">Cluster</h3>\n", "<ul style=\"text-align: left; list-style:none; margin: 0; padding: 0;\">\n", " <li><b>Workers: </b>4</li>\n", " <li><b>Cores: </b>12</li>\n", " <li><b>Memory: </b>33.35 GB</li>\n", "</ul>\n", "</td>\n", "</tr>\n", "</table>" ], "text/plain": [ "<Client: scheduler='tcp://127.0.0.1:35429' processes=4 cores=12>" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import dask.dataframe as dd\n", "from dask.distributed import Client, progress\n", "Client()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>argument_0</th>\n", " <th>symbol</th>\n", " <th>operation</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td></td>\n", " <td>window.navigator.userAgent</td>\n", " <td>get</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td></td>\n", " <td>window.navigator.userAgent</td>\n", " <td>get</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td></td>\n", " <td>window.navigator.userAgent</td>\n", " <td>get</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td></td>\n", " <td>window.navigator.appName</td>\n", " <td>get</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td></td>\n", " <td>window.navigator.appVersion</td>\n", " <td>get</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " argument_0 symbol operation\n", "0 window.navigator.userAgent get\n", "1 window.navigator.userAgent get\n", "2 window.navigator.userAgent get\n", "3 window.navigator.appName get\n", "4 window.navigator.appVersion get" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = dd.read_parquet('/home/bird/Data/tt/full/overscripted.parquet/', columns=('argument_0', 'symbol', 'operation'))\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [], "source": [ "fillText = df[df.symbol == 'CanvasRenderingContext2D.fillText']\n", "fillText = fillText.persist()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "progress(fillText, notebook=False)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>argument_0</th>\n", " <th>symbol</th>\n", " <th>operation</th>\n", " </tr>\n", " <tr>\n", " <th>call_id</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__125</th>\n", " <td>Soft Ruddy Foothold 2</td>\n", " <td>CanvasRenderingContext2D.fillText</td>\n", " <td>call</td>\n", " </tr>\n", " <tr>\n", " <th>1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__132</th>\n", " <td>!H71JCaj)]# 1@#</td>\n", " <td>CanvasRenderingContext2D.fillText</td>\n", " <td>call</td>\n", " </tr>\n", " <tr>\n", " <th>1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__188</th>\n", " <td><@nv45. F1n63r,Pr1n71n6!</td>\n", " <td>CanvasRenderingContext2D.fillText</td>\n", " <td>call</td>\n", " </tr>\n", " <tr>\n", " <th>1_0001213aecc8140d73918b7fcd11af181a850ce5b7d258f82771a4b3.json__197</th>\n", " <td>668</td>\n", " <td>CanvasRenderingContext2D.fillText</td>\n", " <td>call</td>\n", " </tr>\n", " <tr>\n", " <th>1_00021485d883465dc356bceabf4203dec5012044c643ab3498da2d1c.json__30</th>\n", " <td>Soft Ruddy Foothold 2</td>\n", " <td>CanvasRenderingContext2D.fillText</td>\n", " <td>call</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " argument_0 \\\n", "call_id \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... Soft Ruddy Foothold 2 \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... !H71JCaj)]# 1@# \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... <@nv45. F1n63r,Pr1n71n6! \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... 668 \n", "1_00021485d883465dc356bceabf4203dec5012044c643a... Soft Ruddy Foothold 2 \n", "\n", " symbol \\\n", "call_id \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... CanvasRenderingContext2D.fillText \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... CanvasRenderingContext2D.fillText \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... CanvasRenderingContext2D.fillText \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... CanvasRenderingContext2D.fillText \n", "1_00021485d883465dc356bceabf4203dec5012044c643a... CanvasRenderingContext2D.fillText \n", "\n", " operation \n", "call_id \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... call \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... call \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... call \n", "1_0001213aecc8140d73918b7fcd11af181a850ce5b7d25... call \n", "1_00021485d883465dc356bceabf4203dec5012044c643a... call " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fillText.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[########################################] | 100% Completed | 8.6s" ] } ], "source": [ "_arg_counts = fillText.argument_0.value_counts().persist()\n", "progress(_arg_counts, notebook=False)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>argument_0</th>\n", " <th>count</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>π</td>\n", " <td>37327</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Cwm fjordbank glyphs vext quiz, π</td>\n", " <td>21436</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td></td>\n", " <td>14313</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>πΊπ³</td>\n", " <td>12062</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>π§ββοΈ</td>\n", " <td>10422</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>π§ββοΈ</td>\n", " <td>10422</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>πΊβπ³</td>\n", " <td>10422</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>45</td>\n", " <td>8637</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>38</td>\n", " <td>8340</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>!H71JCaj)]# 1@#</td>\n", " <td>8149</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>Soft Ruddy Foothold 2</td>\n", " <td>8149</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>!image!</td>\n", " <td>7301</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>e</td>\n", " <td>6045</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>201708</td>\n", " <td>5785</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>201706</td>\n", " <td>5785</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>201704</td>\n", " <td>5785</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>0</td>\n", " <td>5550</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>a</td>\n", " <td>4787</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>i</td>\n", " <td>4471</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>n</td>\n", " <td>4194</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>o</td>\n", " <td>4096</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>t</td>\n", " <td>3974</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>http://valve.github.io</td>\n", " <td>3912</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>r</td>\n", " <td>3621</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>s</td>\n", " <td>3447</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td><@nv45. F1n63r,Pr1n71n6!</td>\n", " <td>3347</td>\n", " </tr>\n", " <tr>\n", " <th>26</th>\n", " <td>π¨</td>\n", " <td>3315</td>\n", " </tr>\n", " <tr>\n", " <th>27</th>\n", " <td>10</td>\n", " <td>3289</td>\n", " </tr>\n", " <tr>\n", " <th>28</th>\n", " <td>11</td>\n", " <td>3230</td>\n", " </tr>\n", " <tr>\n", " <th>29</th>\n", " <td>ζδ»½</td>\n", " <td>3087</td>\n", " </tr>\n", " <tr>\n", " <th>30</th>\n", " <td>06</td>\n", " <td>2799</td>\n", " </tr>\n", " <tr>\n", " <th>31</th>\n", " <td>07</td>\n", " <td>2794</td>\n", " </tr>\n", " <tr>\n", " <th>32</th>\n", " <td>08</td>\n", " <td>2785</td>\n", " </tr>\n", " <tr>\n", " <th>33</th>\n", " <td>09</td>\n", " <td>2782</td>\n", " </tr>\n", " <tr>\n", " <th>34</th>\n", " <td>2,000</td>\n", " <td>2706</td>\n", " </tr>\n", " <tr>\n", " <th>41</th>\n", " <td>$201000</td>\n", " <td>2697</td>\n", " </tr>\n", " <tr>\n", " <th>39</th>\n", " <td>$197000</td>\n", " <td>2697</td>\n", " </tr>\n", " <tr>\n", " <th>40</th>\n", " <td>$196000</td>\n", " <td>2697</td>\n", " </tr>\n", " <tr>\n", " <th>36</th>\n", " <td>$200000</td>\n", " <td>2697</td>\n", " </tr>\n", " <tr>\n", " <th>38</th>\n", " <td>$198000</td>\n", " <td>2697</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " argument_0 count\n", "0 π 37327\n", "1 Cwm fjordbank glyphs vext quiz, π 21436\n", "2 14313\n", "3 πΊπ³ 12062\n", "4 π§ββοΈ 10422\n", "5 π§ββοΈ 10422\n", "6 πΊβπ³ 10422\n", "7 45 8637\n", "8 38 8340\n", "9 !H71JCaj)]# 1@# 8149\n", "10 Soft Ruddy Foothold 2 8149\n", "11 !image! 7301\n", "12 e 6045\n", "13 201708 5785\n", "14 201706 5785\n", "15 201704 5785\n", "16 0 5550\n", "17 a 4787\n", "18 i 4471\n", "19 n 4194\n", "20 o 4096\n", "21 t 3974\n", "22 http://valve.github.io 3912\n", "23 r 3621\n", "24 s 3447\n", "25 <@nv45. F1n63r,Pr1n71n6! 3347\n", "26 π¨ 3315\n", "27 10 3289\n", "28 11 3230\n", "29 ζδ»½ 3087\n", "30 06 2799\n", "31 07 2794\n", "32 08 2785\n", "33 09 2782\n", "34 2,000 2706\n", "41 $201000 2697\n", "39 $197000 2697\n", "40 $196000 2697\n", "36 $200000 2697\n", "38 $198000 2697" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arg_counts = _arg_counts.compute().reset_index().rename(columns={\n", " 'index': 'argument_0',\n", " 'argument_0': 'count'\n", "}).sort_values('count', ascending=False)\n", "arg_counts.head(40)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[########################################] | 100% Completed | 8.5s" ] } ], "source": [ "_operation_counts = fillText.operation.value_counts().persist()\n", "progress(_operation_counts, notebook=False)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "call 542838\n", "set 58\n", "set (failed) 0\n", "get 0\n", "Name: operation, dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "operation_counts = _operation_counts.compute()\n", "operation_counts" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }