{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " Loading BokehJS ...\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "\n", "(function(root) {\n", " function now() {\n", " return new Date();\n", " }\n", "\n", " var force = true;\n", "\n", " if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n", " root._bokeh_onload_callbacks = [];\n", " root._bokeh_is_loading = undefined;\n", " }\n", "\n", " var JS_MIME_TYPE = 'application/javascript';\n", " var HTML_MIME_TYPE = 'text/html';\n", " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", " var CLASS_NAME = 'output_bokeh rendered_html';\n", "\n", " /**\n", " * Render data to the DOM node\n", " */\n", " function render(props, node) {\n", " var script = document.createElement(\"script\");\n", " node.appendChild(script);\n", " }\n", "\n", " /**\n", " * Handle when an output is cleared or removed\n", " */\n", " function handleClearOutput(event, handle) {\n", " var cell = handle.cell;\n", "\n", " var id = cell.output_area._bokeh_element_id;\n", " var server_id = cell.output_area._bokeh_server_id;\n", " // Clean up Bokeh references\n", " if (id !== undefined) {\n", " Bokeh.index[id].model.document.clear();\n", " delete Bokeh.index[id];\n", " }\n", "\n", " if (server_id !== undefined) {\n", " // Clean up Bokeh references\n", " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", " cell.notebook.kernel.execute(cmd, {\n", " iopub: {\n", " output: function(msg) {\n", " var element_id = msg.content.text.trim();\n", " Bokeh.index[element_id].model.document.clear();\n", " delete Bokeh.index[element_id];\n", " }\n", " }\n", " });\n", " // Destroy server and session\n", " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", " cell.notebook.kernel.execute(cmd);\n", " }\n", " }\n", "\n", " /**\n", " * Handle when a new output is added\n", " */\n", " function handleAddOutput(event, handle) {\n", " var output_area = handle.output_area;\n", " var output = handle.output;\n", "\n", " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", " return\n", " }\n", "\n", " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", "\n", " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", " toinsert[0].firstChild.textContent = output.data[JS_MIME_TYPE];\n", " // store reference to embed id on output_area\n", " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", " }\n", " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", " var bk_div = document.createElement(\"div\");\n", " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", " var script_attrs = bk_div.children[0].attributes;\n", " for (var i = 0; i < script_attrs.length; i++) {\n", " toinsert[0].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", " }\n", " // store reference to server id on output_area\n", " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", " }\n", " }\n", "\n", " function register_renderer(events, OutputArea) {\n", "\n", " function append_mime(data, metadata, element) {\n", " // create a DOM node to render to\n", " var toinsert = this.create_output_subarea(\n", " metadata,\n", " CLASS_NAME,\n", " EXEC_MIME_TYPE\n", " );\n", " this.keyboard_manager.register_events(toinsert);\n", " // Render to node\n", " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", " render(props, toinsert[0]);\n", " element.append(toinsert);\n", " return toinsert\n", " }\n", "\n", " /* Handle when an output is cleared or removed */\n", " events.on('clear_output.CodeCell', handleClearOutput);\n", " events.on('delete.Cell', handleClearOutput);\n", "\n", " /* Handle when a new output is added */\n", " events.on('output_added.OutputArea', handleAddOutput);\n", "\n", " /**\n", " * Register the mime type and append_mime function with output_area\n", " */\n", " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", " /* Is output safe? */\n", " safe: true,\n", " /* Index of renderer in `output_area.display_order` */\n", " index: 0\n", " });\n", " }\n", "\n", " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", " if (root.Jupyter !== undefined) {\n", " var events = require('base/js/events');\n", " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", "\n", " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", " register_renderer(events, OutputArea);\n", " }\n", " }\n", "\n", " \n", " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", " root._bokeh_timeout = Date.now() + 5000;\n", " root._bokeh_failed_load = false;\n", " }\n", "\n", " var NB_LOAD_WARNING = {'data': {'text/html':\n", " \"
\\n\"+\n", " \"

\\n\"+\n", " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", " \"

\\n\"+\n", " \"\\n\"+\n", " \"\\n\"+\n", " \"from bokeh.resources import INLINE\\n\"+\n", " \"output_notebook(resources=INLINE)\\n\"+\n", " \"\\n\"+\n", " \"
\"}};\n", "\n", " function display_loaded() {\n", " var el = document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\");\n", " if (el != null) {\n", " el.textContent = \"BokehJS is loading...\";\n", " }\n", " if (root.Bokeh !== undefined) {\n", " if (el != null) {\n", " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", " }\n", " } else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(display_loaded, 100)\n", " }\n", " }\n", "\n", "\n", " function run_callbacks() {\n", " try {\n", " root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n", " }\n", " finally {\n", " delete root._bokeh_onload_callbacks\n", " }\n", " console.info(\"Bokeh: all callbacks have finished\");\n", " }\n", "\n", " function load_libs(js_urls, callback) {\n", " root._bokeh_onload_callbacks.push(callback);\n", " if (root._bokeh_is_loading > 0) {\n", " console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", " return null;\n", " }\n", " if (js_urls == null || js_urls.length === 0) {\n", " run_callbacks();\n", " return null;\n", " }\n", " console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", " root._bokeh_is_loading = js_urls.length;\n", " for (var i = 0; i < js_urls.length; i++) {\n", " var url = js_urls[i];\n", " var s = document.createElement('script');\n", " s.src = url;\n", " s.async = false;\n", " s.onreadystatechange = s.onload = function() {\n", " root._bokeh_is_loading--;\n", " if (root._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: all BokehJS libraries loaded\");\n", " run_callbacks()\n", " }\n", " };\n", " s.onerror = function() {\n", " console.warn(\"failed to load library \" + url);\n", " };\n", " console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", " document.getElementsByTagName(\"head\")[0].appendChild(s);\n", " }\n", " };var element = document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\");\n", " if (element == null) {\n", " console.log(\"Bokeh: ERROR: autoload.js configured with elementid '4d60d675-a052-4534-8f26-36558ae35110' but no matching script tag was found. \")\n", " return false;\n", " }\n", "\n", " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.13.min.js\"];\n", "\n", " var inline_js = [\n", " function(Bokeh) {\n", " Bokeh.set_log_level(\"info\");\n", " },\n", " \n", " function(Bokeh) {\n", " \n", " },\n", " function(Bokeh) {\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.css\");\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.css\");\n", " console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.css\");\n", " Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.css\");\n", " }\n", " ];\n", "\n", " function run_inline_js() {\n", " \n", " if ((root.Bokeh !== undefined) || (force === true)) {\n", " for (var i = 0; i < inline_js.length; i++) {\n", " inline_js[i].call(root, root.Bokeh);\n", " }if (force === true) {\n", " display_loaded();\n", " }} else if (Date.now() < root._bokeh_timeout) {\n", " setTimeout(run_inline_js, 100);\n", " } else if (!root._bokeh_failed_load) {\n", " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", " root._bokeh_failed_load = true;\n", " } else if (force !== true) {\n", " var cell = $(document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\")).parents('.cell').data().cell;\n", " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", " }\n", "\n", " }\n", "\n", " if (root._bokeh_is_loading === 0) {\n", " console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", " run_inline_js();\n", " } else {\n", " load_libs(js_urls, function() {\n", " console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n", " run_inline_js();\n", " });\n", " }\n", "}(window));" ], "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\");\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n }\n finally {\n delete root._bokeh_onload_callbacks\n }\n console.info(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(js_urls, callback) {\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = js_urls.length;\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var s = document.createElement('script');\n s.src = url;\n s.async = false;\n s.onreadystatechange = s.onload = function() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.log(\"Bokeh: all BokehJS libraries loaded\");\n run_callbacks()\n }\n };\n s.onerror = function() {\n console.warn(\"failed to load library \" + url);\n };\n console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.getElementsByTagName(\"head\")[0].appendChild(s);\n }\n };var element = document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\");\n if (element == null) {\n console.log(\"Bokeh: ERROR: autoload.js configured with elementid '4d60d675-a052-4534-8f26-36558ae35110' but no matching script tag was found. \")\n return false;\n }\n\n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.13.min.js\"];\n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n \n function(Bokeh) {\n \n },\n function(Bokeh) {\n console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.css\");\n Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.css\");\n console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.css\");\n Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.css\");\n console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.css\");\n Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.css\");\n }\n ];\n\n function run_inline_js() {\n \n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }if (force === true) {\n display_loaded();\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\")).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(js_urls, function() {\n console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import requests\n", "import pandas as pd\n", "import numpy as np\n", "import scattertext as st\n", "import spacy\n", "import time\n", "from IPython.display import IFrame\n", "from IPython.core.display import display, HTML\n", "from bokeh.palettes import PuBu\n", "from bokeh.io import show, output_notebook\n", "from bokeh.models import ColumnDataSource, ranges, LabelSet\n", "from bokeh.plotting import figure\n", "output_notebook()\n", "display(HTML(\"\"))\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "assert [int(x) for x in st.__version__.split('.')] >=[0,0,2,20]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "url = 'https://openreview.net/notes?invitation=ICLR.cc%2F2018%2FConference%2F-%2FBlind_Submission&offset=0&limit=1000'\n", "df = pd.DataFrame(requests.get(url).json()['notes'])" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "scrolled": true }, "outputs": [ { "ename": "NameError", "evalue": "name 'df' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mforum_content\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforum_id\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforum\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mnotes_url\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'https://openreview.net/notes?forum={}&trash=true'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mforum_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mforum_content\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnotes_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined" ] } ], "source": [ "forum_content = []\n", "for i, forum_id in list(enumerate(df.forum)):\n", " notes_url = 'https://openreview.net/notes?forum={}&trash=true'.format(forum_id)\n", " try:\n", " forum_content.append(requests.get(notes_url).json())\n", " except:\n", " print('err', i, forum_id)\n", " forum_content = {}\n", " time.sleep(.3)\n", "df['forumContent'] = pd.Series(forum_content) \n", "\n", "df.to_csv('iclr2018_raw.csv.bz2', index=False, compression='bz2')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#\n", "# Code here is to read locally:\n", "read_local = True\n", "if read_local:\n", " df = pd.read_csv('iclr2018_raw.csv.bz2')\n", " df['forumContent'] = df.forumContent.apply(eval) # totally unsafe\n", " df['content'] = df.content.apply(eval) " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Reject 504\n", "Accept (Poster) 313\n", "Invite to Workshop Track 90\n", "Accept (Oral) 23\n", "Name: decision_raw, dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['decision_raw'] = df.forumContent.apply(lambda x:[n['content']['decision'] \n", " for n in x['notes'] \n", " if 'decision' in n['content']][0])\n", "df['decision_raw'].value_counts()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "930" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(df)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df['title'] = df.content.apply(lambda x: x['title'])\n", "df['authors'] = df.content.apply(lambda x: x['authors'])\n", "\n", "only_reviews_df = pd.concat(df.forumContent.apply(lambda c: pd.DataFrame([\n", " {'review': n['content']['review'], \n", " 'rating': n['content']['rating'], \n", " 'confidence': n['content']['confidence'],\n", " 'forum': n['forum']} \n", " for n in c['notes'] \n", " if 'content' in n and 'review' in n['content']\n", "])).tolist())\n", "reviews_df = pd.merge(df[['title', 'authors', 'decision_raw', 'forum']], only_reviews_df, on='forum')\n", "#reviews_df.groupby('decision_raw')['rating'].value_counts()\n", "reviews_df['decision'] = (reviews_df['decision_raw']\n", " .apply(lambda x: 'Reject' if x == 'Reject' \n", " else ('Accept' if x.startswith('Accept') \n", " else 'Workshop')))\n", "reviews_df['rating_bin'] = (reviews_df['rating']\n", " .apply(lambda x: (lambda s: 'Negative' if s < 5 \n", " else ('Positive' if s > 6 else 'Neutral'))\n", " (int(x.split(':')[0].strip()))))\n", "reviews_df['category'] = reviews_df['decision'] + ', ' + reviews_df['rating_bin']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "(function(root) {\n", " function embed_document(root) {\n", " \n", " var docs_json = {\"04c8c15c-980f-4431-b62b-a6dcc72ef801\":{\"roots\":{\"references\":[{\"attributes\":{\"axis_label\":\"Paper Count\",\"formatter\":{\"id\":\"3c397ccd-8924-451a-a3dc-9af7e9230903\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"86920c3e-d9f3-4110-b1cf-874d7f3c4af3\",\"type\":\"BasicTicker\"}},\"id\":\"586cf370-3e20-48cd-9274-225a237cb4cc\",\"type\":\"LinearAxis\"},{\"attributes\":{\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"a0d191bb-0042-407e-82e3-6bc2458b8158\",\"type\":\"CategoricalTicker\"}},\"id\":\"cf6b767a-0362-4d8a-adc0-415ab6e34ae8\",\"type\":\"Grid\"},{\"attributes\":{\"source\":{\"id\":\"0ec96012-280e-40e6-b18f-9433ffa3d58e\",\"type\":\"ColumnDataSource\"}},\"id\":\"1afc5c41-ab48-4f9d-aa62-8c66034500c6\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"3c397ccd-8924-451a-a3dc-9af7e9230903\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"a0d191bb-0042-407e-82e3-6bc2458b8158\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"44bbb868-46d5-46bd-bd88-c8774d33fa9b\",\"type\":\"SaveTool\"},{\"attributes\":{\"level\":\"glyph\",\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"source\":{\"id\":\"0ec96012-280e-40e6-b18f-9433ffa3d58e\",\"type\":\"ColumnDataSource\"},\"text\":{\"field\":\"y\"},\"x\":{\"field\":\"x\"},\"x_offset\":{\"value\":-13.5},\"y\":{\"field\":\"y\"}},\"id\":\"66b3bebd-5172-4f50-9042-499885e6d33b\",\"type\":\"LabelSet\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"top\":{\"field\":\"y\"},\"width\":{\"value\":0.3},\"x\":{\"field\":\"x\"}},\"id\":\"a1223a80-602a-467a-8756-7da9f4fa7dbf\",\"type\":\"VBar\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"86920c3e-d9f3-4110-b1cf-874d7f3c4af3\",\"type\":\"BasicTicker\"}},\"id\":\"5d732d58-a986-4351-84f4-9405c08c0d15\",\"type\":\"Grid\"},{\"attributes\":{\"data_source\":{\"id\":\"0ec96012-280e-40e6-b18f-9433ffa3d58e\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"4907ffff-249f-455f-8a8c-646f68c597b0\",\"type\":\"VBar\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"a1223a80-602a-467a-8756-7da9f4fa7dbf\",\"type\":\"VBar\"},\"selection_glyph\":null,\"view\":{\"id\":\"1afc5c41-ab48-4f9d-aa62-8c66034500c6\",\"type\":\"CDSView\"}},\"id\":\"8a0bf3e0-ef0a-4326-986c-7dcdf781867f\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"86920c3e-d9f3-4110-b1cf-874d7f3c4af3\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"cf2a9102-8691-4eec-a72c-a978e9f6e1a7\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"plot\":null,\"text\":\"\"},\"id\":\"06197a5f-0441-4cc1-9957-b9ba00feb06b\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"column_names\":[\"y\",\"x\"],\"data\":{\"x\":[\"Reject\",\"Accept (Poster)\",\"Invite to Workshop Track\",\"Accept (Oral)\"],\"y\":[503,313,90,23]}},\"id\":\"0ec96012-280e-40e6-b18f-9433ffa3d58e\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"44bbb868-46d5-46bd-bd88-c8774d33fa9b\",\"type\":\"SaveTool\"}]},\"id\":\"37ae1413-538f-4b21-b190-439ba28d7543\",\"type\":\"Toolbar\"},{\"attributes\":{\"below\":[{\"id\":\"02ccde59-350b-4d45-94d0-ca67ca1b40f6\",\"type\":\"CategoricalAxis\"}],\"left\":[{\"id\":\"586cf370-3e20-48cd-9274-225a237cb4cc\",\"type\":\"LinearAxis\"}],\"plot_height\":300,\"renderers\":[{\"id\":\"02ccde59-350b-4d45-94d0-ca67ca1b40f6\",\"type\":\"CategoricalAxis\"},{\"id\":\"cf6b767a-0362-4d8a-adc0-415ab6e34ae8\",\"type\":\"Grid\"},{\"id\":\"586cf370-3e20-48cd-9274-225a237cb4cc\",\"type\":\"LinearAxis\"},{\"id\":\"5d732d58-a986-4351-84f4-9405c08c0d15\",\"type\":\"Grid\"},{\"id\":\"8a0bf3e0-ef0a-4326-986c-7dcdf781867f\",\"type\":\"GlyphRenderer\"},{\"id\":\"66b3bebd-5172-4f50-9042-499885e6d33b\",\"type\":\"LabelSet\"}],\"title\":{\"id\":\"06197a5f-0441-4cc1-9957-b9ba00feb06b\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"37ae1413-538f-4b21-b190-439ba28d7543\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"bb66180b-3575-4b26-bd58-94a327b11ee0\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"cf2a9102-8691-4eec-a72c-a978e9f6e1a7\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"0fb28cfb-111c-41f1-8d18-411fde8c005c\",\"type\":\"Range1d\"},\"y_scale\":{\"id\":\"dbaaec8b-fb9b-4ce6-bfaa-d4a08c80fc8e\",\"type\":\"LinearScale\"}},\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"fill_color\":{\"value\":\"#3690c0\"},\"line_color\":{\"value\":\"#3690c0\"},\"top\":{\"field\":\"y\"},\"width\":{\"value\":0.3},\"x\":{\"field\":\"x\"}},\"id\":\"4907ffff-249f-455f-8a8c-646f68c597b0\",\"type\":\"VBar\"},{\"attributes\":{},\"id\":\"dbaaec8b-fb9b-4ce6-bfaa-d4a08c80fc8e\",\"type\":\"LinearScale\"},{\"attributes\":{\"callback\":null,\"end\":600},\"id\":\"0fb28cfb-111c-41f1-8d18-411fde8c005c\",\"type\":\"Range1d\"},{\"attributes\":{},\"id\":\"0258166d-9faf-4814-8706-9b336777b00a\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Reject\",\"Accept (Poster)\",\"Invite to Workshop Track\",\"Accept (Oral)\"]},\"id\":\"bb66180b-3575-4b26-bd58-94a327b11ee0\",\"type\":\"FactorRange\"},{\"attributes\":{\"axis_label\":\"Decision\",\"formatter\":{\"id\":\"0258166d-9faf-4814-8706-9b336777b00a\",\"type\":\"CategoricalTickFormatter\"},\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"a0d191bb-0042-407e-82e3-6bc2458b8158\",\"type\":\"CategoricalTicker\"}},\"id\":\"02ccde59-350b-4d45-94d0-ca67ca1b40f6\",\"type\":\"CategoricalAxis\"}],\"root_ids\":[\"e08dad28-652d-461f-8edd-1e4bd2a3c114\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.13\"}};\n", " var render_items = [{\"docid\":\"04c8c15c-980f-4431-b62b-a6dcc72ef801\",\"elementid\":\"a7ac1fc1-d8dc-4943-8713-8eaa635969ee\",\"modelid\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\"}];\n", " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", "\n", " }\n", " if (root.Bokeh !== undefined) {\n", " embed_document(root);\n", " } else {\n", " var attempts = 0;\n", " var timer = setInterval(function(root) {\n", " if (root.Bokeh !== undefined) {\n", " embed_document(root);\n", " clearInterval(timer);\n", " }\n", " attempts++;\n", " if (attempts > 100) {\n", " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n", " clearInterval(timer);\n", " }\n", " }, 10, root)\n", " }\n", "})(window);" ], "application/vnd.bokehjs_exec.v0+json": "" }, "metadata": { "application/vnd.bokehjs_exec.v0+json": { "id": "e08dad28-652d-461f-8edd-1e4bd2a3c114" } }, "output_type": "display_data" } ], "source": [ "decisions = reviews_df[['forum','decision_raw']].drop_duplicates()['decision_raw'].value_counts()\n", "source = ColumnDataSource(dict(x=list(decisions.index),y=decisions.values))\n", "\n", "#source = ColumnDataSource({'x': decisions.index, 'y': decisions.values}\n", "\n", "plot = figure(plot_width=600, plot_height=300, tools=\"save\",\n", " x_axis_label = \"Decision\",\n", " y_axis_label = \"Paper Count\",\n", " title=\"\",\n", " x_minor_ticks=2,\n", " x_range = source.data[\"x\"],\n", " y_range= ranges.Range1d(start=0,end=600))\n", "\n", "\n", "labels = LabelSet(x='x', y='y', text='y', level='glyph',\n", " x_offset=-13.5, y_offset=0, source=source, render_mode='canvas')\n", "\n", "plot.vbar(source=source,x='x',top='y',bottom=0,width=0.3,color=PuBu[7][2])\n", "\n", "plot.add_layout(labels)\n", "show(plot)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/kesslej/anaconda3/lib/python3.5/site-packages/bokeh/core/json_encoder.py:80: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n", " elif np.issubdtype(type(obj), np.float):\n" ] }, { "data": { "text/html": [ "\n", "
\n", "
\n", "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/javascript": [ "(function(root) {\n", " function embed_document(root) {\n", " \n", " var docs_json = {\"3992f4c1-1c27-41f0-95d0-459441571f79\":{\"roots\":{\"references\":[{\"attributes\":{\"source\":{\"id\":\"3d18162c-6726-4ca0-9a04-66b34e6acc93\",\"type\":\"ColumnDataSource\"}},\"id\":\"cbf4d423-e941-4de1-b1e8-7d5c14dbc49f\",\"type\":\"CDSView\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"7ca8c5c4-5132-4a72-9349-5ff57e5478eb\",\"type\":\"SaveTool\"}]},\"id\":\"04c36a4c-7afb-4b07-afd0-d938f933a67f\",\"type\":\"Toolbar\"},{\"attributes\":{\"fill_color\":{\"value\":\"#3690c0\"},\"line_color\":{\"value\":\"#3690c0\"},\"top\":{\"field\":\"y\"},\"width\":{\"value\":0.3},\"x\":{\"field\":\"x\"}},\"id\":\"7e9e282f-9711-49c8-aff6-8a547f42590c\",\"type\":\"VBar\"},{\"attributes\":{\"callback\":null,\"factors\":[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\",\"10\"]},\"id\":\"2d77e4a6-b55e-45ec-9b05-57756cae6a52\",\"type\":\"FactorRange\"},{\"attributes\":{\"data_source\":{\"id\":\"3d18162c-6726-4ca0-9a04-66b34e6acc93\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"7e9e282f-9711-49c8-aff6-8a547f42590c\",\"type\":\"VBar\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"718454da-4d02-44ec-91cf-e45407ab3171\",\"type\":\"VBar\"},\"selection_glyph\":null,\"view\":{\"id\":\"cbf4d423-e941-4de1-b1e8-7d5c14dbc49f\",\"type\":\"CDSView\"}},\"id\":\"b6254c76-91a8-41c0-9c11-7f125b45f4cd\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"axis_label\":\"Rating\",\"formatter\":{\"id\":\"ff7aec16-5f93-40dc-a810-1b0afd4d23cc\",\"type\":\"CategoricalTickFormatter\"},\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"05a341e8-b9a6-411f-8200-16fcdd1ed9db\",\"type\":\"CategoricalTicker\"}},\"id\":\"cc64e842-04e8-4d3c-9a9e-ddb01066e387\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"4c20e64f-7e69-4552-a33d-a8d4744fe3cb\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"7ca8c5c4-5132-4a72-9349-5ff57e5478eb\",\"type\":\"SaveTool\"},{\"attributes\":{},\"id\":\"ce818dfa-c79d-4a08-aa91-f035ed3cacee\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"ff7aec16-5f93-40dc-a810-1b0afd4d23cc\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"6fe4df45-f5b6-4457-a258-7612e69f3e73\",\"type\":\"BasicTicker\"}},\"id\":\"5d0affcf-a6c3-4b09-8644-b9dcc44c91b0\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"6fe4df45-f5b6-4457-a258-7612e69f3e73\",\"type\":\"BasicTicker\"},{\"attributes\":{\"callback\":null,\"column_names\":[\"y\",\"x\"],\"data\":{\"x\":[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\",\"10\"],\"y\":[5,63,234,557,576,626,526,170,47,2]}},\"id\":\"3d18162c-6726-4ca0-9a04-66b34e6acc93\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"05a341e8-b9a6-411f-8200-16fcdd1ed9db\",\"type\":\"CategoricalTicker\"}},\"id\":\"cf5ed539-ae32-4ec8-9156-5cb47e9d3dfd\",\"type\":\"Grid\"},{\"attributes\":{\"callback\":null,\"end\":726},\"id\":\"58b3d85b-ba67-4d16-881f-316baa7349dc\",\"type\":\"Range1d\"},{\"attributes\":{},\"id\":\"6498ef77-52c7-4804-8e6e-a271b63c34c9\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"axis_label\":\"Review Count\",\"formatter\":{\"id\":\"ce818dfa-c79d-4a08-aa91-f035ed3cacee\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"6fe4df45-f5b6-4457-a258-7612e69f3e73\",\"type\":\"BasicTicker\"}},\"id\":\"0d11ad06-89cd-4e26-a278-3ea5cb624918\",\"type\":\"LinearAxis\"},{\"attributes\":{\"below\":[{\"id\":\"cc64e842-04e8-4d3c-9a9e-ddb01066e387\",\"type\":\"CategoricalAxis\"}],\"left\":[{\"id\":\"0d11ad06-89cd-4e26-a278-3ea5cb624918\",\"type\":\"LinearAxis\"}],\"plot_height\":300,\"renderers\":[{\"id\":\"cc64e842-04e8-4d3c-9a9e-ddb01066e387\",\"type\":\"CategoricalAxis\"},{\"id\":\"cf5ed539-ae32-4ec8-9156-5cb47e9d3dfd\",\"type\":\"Grid\"},{\"id\":\"0d11ad06-89cd-4e26-a278-3ea5cb624918\",\"type\":\"LinearAxis\"},{\"id\":\"5d0affcf-a6c3-4b09-8644-b9dcc44c91b0\",\"type\":\"Grid\"},{\"id\":\"b6254c76-91a8-41c0-9c11-7f125b45f4cd\",\"type\":\"GlyphRenderer\"},{\"id\":\"caadb636-d409-48ec-aca2-7fa38fb1db68\",\"type\":\"LabelSet\"}],\"title\":{\"id\":\"49435357-01d4-4cf1-8ead-7fb214572b45\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"04c36a4c-7afb-4b07-afd0-d938f933a67f\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"2d77e4a6-b55e-45ec-9b05-57756cae6a52\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"6498ef77-52c7-4804-8e6e-a271b63c34c9\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"58b3d85b-ba67-4d16-881f-316baa7349dc\",\"type\":\"Range1d\"},\"y_scale\":{\"id\":\"4c20e64f-7e69-4552-a33d-a8d4744fe3cb\",\"type\":\"LinearScale\"}},\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"plot\":null,\"text\":\"\"},\"id\":\"49435357-01d4-4cf1-8ead-7fb214572b45\",\"type\":\"Title\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"top\":{\"field\":\"y\"},\"width\":{\"value\":0.3},\"x\":{\"field\":\"x\"}},\"id\":\"718454da-4d02-44ec-91cf-e45407ab3171\",\"type\":\"VBar\"},{\"attributes\":{},\"id\":\"05a341e8-b9a6-411f-8200-16fcdd1ed9db\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"level\":\"glyph\",\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"source\":{\"id\":\"3d18162c-6726-4ca0-9a04-66b34e6acc93\",\"type\":\"ColumnDataSource\"},\"text\":{\"field\":\"y\"},\"x\":{\"field\":\"x\"},\"x_offset\":{\"value\":-13.5},\"y\":{\"field\":\"y\"}},\"id\":\"caadb636-d409-48ec-aca2-7fa38fb1db68\",\"type\":\"LabelSet\"}],\"root_ids\":[\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.13\"}};\n", " var render_items = [{\"docid\":\"3992f4c1-1c27-41f0-95d0-459441571f79\",\"elementid\":\"5e279a98-1f57-43ac-9e66-3a4931885037\",\"modelid\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\"}];\n", " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", "\n", " }\n", " if (root.Bokeh !== undefined) {\n", " embed_document(root);\n", " } else {\n", " var attempts = 0;\n", " var timer = setInterval(function(root) {\n", " if (root.Bokeh !== undefined) {\n", " embed_document(root);\n", " clearInterval(timer);\n", " }\n", " attempts++;\n", " if (attempts > 100) {\n", " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n", " clearInterval(timer);\n", " }\n", " }, 10, root)\n", " }\n", "})(window);" ], "application/vnd.bokehjs_exec.v0+json": "" }, "metadata": { "application/vnd.bokehjs_exec.v0+json": { "id": "409a6481-03e6-4a1b-a9c7-ff130ed0c75c" } }, "output_type": "display_data" } ], "source": [ "ratings = reviews_df['rating'].value_counts()\n", "ratings.index = [int(c.split(':')[0]) for c in ratings.index]\n", "ratings = ratings.sort_index()\n", "source = ColumnDataSource(dict(x=[str(x) for x in ratings.index],y=ratings.values))\n", "\n", "plot = figure(plot_width=600, plot_height=300, tools=\"save\",\n", " x_axis_label = \"Rating\",\n", " y_axis_label = \"Review Count\",\n", " title=\"\",\n", " x_minor_ticks=2,\n", " x_range = source.data[\"x\"],\n", " y_range= ranges.Range1d(start=0,end=ratings.max() + 100))\n", "\n", "\n", "labels = LabelSet(x='x', y='y', text='y', level='glyph',\n", " x_offset=-13.5, y_offset=0, source=source, render_mode='canvas')\n", "\n", "plot.vbar(source=source,x='x',top='y',bottom=0,width=0.3,color=PuBu[7][2])\n", "\n", "plot.add_layout(labels)\n", "show(plot)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "\n", "reviews_df['metadata'] = (\n", " reviews_df['title'] + '
Score: ' + reviews_df['rating'].apply(lambda x: x.split(':')[0]) + '/10'\n", " + '
Confidence: ' + reviews_df['confidence'].apply(lambda x: x.split(':')[0]) + '/5'\n", " + '
Ultimate decision: ' + reviews_df['decision'].apply(lambda x: x.split(':')[0]) + '/10'\n", ")\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "reviews_df.to_csv('iclr2018_reviews.csv.bz2', index=False, compression='bz2')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Start here for NLP" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "nlp = spacy.load('en')\n", "reviews_df['parse'] = reviews_df['review'].apply(nlp)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'rating_bin', parsed_col = 'parse')\n", " .build()\n", " .remove_categories(['Neutral']))" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6131293" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "html = st.produce_scattertext_explorer(corpus, \n", " category='Positive', \n", " not_categories=['Negative'],\n", " transform = st.Scalers.percentile_dense,\n", " term_scorer = st.RankDifference(),\n", " metadata = corpus.get_df()['metadata'])\n", "file_name = '../jasonkessler.github.io/iclr2018reviews/pos_neg_dense.html'\n", "open(file_name, 'wb').write(html.encode('utf-8'))\n", "#IFrame(src=file_name, width = 1500, height=700)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "four_square_corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'category', parsed_col = 'parse')\n", " .build()\n", " .get_unigram_corpus()\n", " .compact(st.ClassPercentageCompactor(term_count=1)))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8267592" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square_axes.html'\n", "open(file_name, 'wb').write(html.encode('utf-8'))\n", "#IFrame(src=file_name, width = 1500, height=700)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8205863" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "four_square_axes = st.FourSquareAxes(four_square_corpus, \n", " left_categories=['Accept, Positive'], \n", " right_categories=['Accept, Negative'], \n", " top_categories=['Reject, Positive'], \n", " bottom_categories=['Reject, Negative'], \n", " labels = {'a': 'Positive',\n", " 'b': 'Review that was Contrary to Accpetance Decision',\n", " 'not_a': 'Negative',\n", " 'not_b': 'Review that in Line With Acceptance Decision'},\n", " term_ranker=st.OncePerDocFrequencyRanker)\n", "html = st.produce_four_square_axes_explorer(\n", " four_square_axes=four_square_axes,\n", " x_label=\"Accepts: Pos-Neg\",\n", " y_label='Rejects: Neg-Pos',\n", " use_full_doc=True,\n", " metadata=four_square_corpus.get_df()['metadata'],\n", " color_func='(function(d) {return d3.rgb(230, 220, 230)})',\n", " censor_points = False,\n", ")\n", "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square_axes_display.html'\n", "open(file_name, 'wb').write(html.encode('utf-8'))\n", "#IFrame(src=file_name, width = 1500, height=700)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8205862" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "four_square_axes = st.FourSquareAxes(four_square_corpus, \n", " left_categories=['Accept, Positive'], \n", " right_categories=['Accept, Negative'], \n", " top_categories=['Reject, Positive'], \n", " bottom_categories=['Reject, Negative'], \n", " labels = {'a': 'Positive',\n", " 'b': 'Review that was Contrary to Accpetance Decision',\n", " 'not_a': 'Negative',\n", " 'not_b': 'Review that in Line With Acceptance Decision'},\n", " term_ranker=st.OncePerDocFrequencyRanker)\n", "html = st.produce_four_square_axes_explorer(\n", " four_square_axes=four_square_axes,\n", " x_label=\"Accepts: Pos-Neg\",\n", " y_label='Rejects: Neg-Pos',\n", " use_full_doc=True,\n", " metadata=four_square_corpus.get_df()['metadata'],\n", " color_func='(function(d) {return d3.rgb(230, 220, 230)})',\n", ")\n", "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square_axes_interactive.html'\n", "open(file_name, 'wb').write(html.encode('utf-8'))\n", "#IFrame(src=file_name, width = 1500, height=700)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8206300" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "four_square= st.FourSquare(four_square_corpus, \n", " category_a_list=['Accept, Positive'], \n", " category_b_list=['Accept, Negative'], \n", " not_category_b_list=['Reject, Positive'], \n", " not_category_a_list=['Reject, Negative'], \n", " labels = {'a_and_b': 'Accept',\n", " 'not_a_and_not_b': 'Reject',\n", " 'a_and_not_b': 'Positive',\n", " 'b_and_not_a': 'Negative'},\n", " term_ranker=st.OncePerDocFrequencyRanker)\n", "html = st.produce_four_square_explorer(\n", " four_square=four_square,\n", " y_label='Accept-Reject',\n", " x_label='Positive-Negative',\n", " use_full_doc=True,\n", " metadata = four_square_corpus.get_df()['metadata'],\n", ")\n", "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square.html'\n", "open(file_name, 'wb').write(html.encode('utf-8'))\n", "#IFrame(src=file_name, width = 1500, height=700)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "29.19629192352295\n" ] } ], "source": [ "#corpus = corpus.remove_infrequent_words(5)\n", "t0 = time.time()\n", "compact_corpus = st.CompactTerms(corpus, st.OncePerDocFrequencyRanker, 5).compact()\n", "print(time.time() - t0)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "fine_grain_corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col='category', parsed_col='parse').build())" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Reject, Negative',\n", " 'Reject, Neutral',\n", " 'Accept, Negative',\n", " 'Accept, Positive',\n", " 'Reject, Positive',\n", " 'Workshop, Neutral',\n", " 'Accept, Neutral',\n", " 'Workshop, Negative',\n", " 'Workshop, Positive']" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fine_grain_corpus.get_categories()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "fine_grain_corpus_compact = st.CompactTerms(fine_grain_corpus, st.OncePerDocFrequencyRanker, 5).compact()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(31640, 307829)" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(fine_grain_corpus_compact.get_terms()), len(fine_grain_corpus.get_terms())" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['case for', 'evaluating', 'closer', 'closer to', 'machines',\n", " 'applications', 'e.g. the', 'node', 'doing', 'are of'],\n", " dtype='object', name='term')\n", "Index(['between', 'way', 'only', 'first', '/', 'method', 'given', 'about',\n", " 'to see', 'see'],\n", " dtype='object', name='term')\n" ] } ], "source": [ "tdf = st.OncePerDocFrequencyRanker(fine_grain_corpus).get_ranks()\n", "ap_vs_rp = st.RankDifference().get_scores(tdf['Accept, Positive freq'], tdf['Reject, Positive freq'])\n", "print(terms.iloc[:10].index)\n", "print(terms.iloc[-10:].index)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Index(['here the', 'observations', 'authors show', 'valuable', 'find that',\n", " 'it ’s', 'from table', 'method which', 'put', 'the process'],\n", " dtype='object', name='term')\n", "Index(['model', 'no', 'for the', 'new', 'neural', 'are not', 'dataset',\n", " 'these', 'about', 'network'],\n", " dtype='object', name='term')\n" ] } ], "source": [ "an_vs_rn = st.RankDifference().get_scores(tdf['Reject, Positive freq'], tdf['Accept, Positive freq'])\n", "print(terms.iloc[:10].index)\n", "print(terms.iloc[-10:].index)" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [], "source": [ "four_square = st.FourSquare(fine_grain_corpus_compact, \n", " ['Accept, Positive'], \n", " ['Reject, Positive'],\n", " ['Accept, Negative'], \n", " ['Reject, Negative'], \n", " term_ranker=st.OncePerDocFrequencyRanker,\n", " scorer = st.RankDifference())" ] }, { "cell_type": "code", "execution_count": 74, "metadata": {}, "outputs": [], "source": [ "html = st.produce_four_square_explorer(four_square=four_square,\n", " x_label='Pos-Neg',\n", " y_label='Accept-Reject',\n", " num_terms_semiotic_square=10,\n", " minimum_term_frequency=10,\n", " pmi_threshold_coefficient=10,\n", " term_ranker=st.OncePerDocFrequencyRanker,\n", " metadata=(fine_grain_corpus_compact._df['category'] + ': '\n", " + fine_grain_corpus_compact._df.rating + ', '\n", " + fine_grain_corpus_compact._df['title']))\n" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "file_name = 'four_square.html'\n", "open(file_name, 'wb').write(html.encode('utf-8'))\n", "#IFrame(src=file_name, width = 1500, height=700)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "axes = four_square.get_axes()" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
xycounts
term
not well-0.060523-0.06052323
observations-0.056245-0.05624552
case for-0.054141-0.05414114
it ’s-0.053300-0.05330040
doing-0.046216-0.04621653
networks as-0.043832-0.04383215
here the-0.043551-0.04355119
be the-0.042359-0.04235962
from table-0.040466-0.04046611
natural language-0.040466-0.04046613
observed-0.039624-0.03962439
generated data-0.037240-0.0372407
unable to-0.037170-0.0371708
ensemble-0.037170-0.03717012
unclear whether-0.037170-0.0371708
are pretty-0.037170-0.0371708
for improving-0.037029-0.03702914
vanilla-0.036959-0.03695919
are just-0.036889-0.03688918
evaluating-0.036889-0.03688924
besides-0.036819-0.03681923
benefit of-0.036538-0.03653831
properties of-0.036398-0.03639837
to add-0.036328-0.03632842
properties-0.035697-0.03569763
are of-0.033943-0.0339436
machines-0.033803-0.03380312
observations and-0.033803-0.0338038
closer to-0.033803-0.03380312
even the-0.033663-0.03366310
............
tasks0.2019080.201908152
clearly0.2028190.202819192
's0.2031000.203100194
their0.2045730.204573282
interesting0.2056950.205695371
easy to0.2112350.211235118
bit0.2118660.211866143
40.2141810.214181236
network0.2143910.214391265
loss0.2150920.215092137
i am0.2190200.219020150
am0.2191600.219160155
you0.2236480.223648203
further0.2247700.224770117
particular0.2256820.225682141
easy0.2319940.231994130
makes0.2322740.232274140
previous0.2325550.232555153
given0.2376740.237674216
is well0.2392870.239287153
both0.2408300.240830215
way0.2439860.243986197
new0.2448280.244828247
novel0.2470020.247002188
first0.2479840.247984226
well written0.2539450.253945200
/0.2583630.258363232
written0.2794730.279473290
about0.3197980.319798249
to see0.3287750.328775185
\n", "

31640 rows × 3 columns

\n", "
" ], "text/plain": [ " x y counts\n", "term \n", "not well -0.060523 -0.060523 23\n", "observations -0.056245 -0.056245 52\n", "case for -0.054141 -0.054141 14\n", "it ’s -0.053300 -0.053300 40\n", "doing -0.046216 -0.046216 53\n", "networks as -0.043832 -0.043832 15\n", "here the -0.043551 -0.043551 19\n", "be the -0.042359 -0.042359 62\n", "from table -0.040466 -0.040466 11\n", "natural language -0.040466 -0.040466 13\n", "observed -0.039624 -0.039624 39\n", "generated data -0.037240 -0.037240 7\n", "unable to -0.037170 -0.037170 8\n", "ensemble -0.037170 -0.037170 12\n", "unclear whether -0.037170 -0.037170 8\n", "are pretty -0.037170 -0.037170 8\n", "for improving -0.037029 -0.037029 14\n", "vanilla -0.036959 -0.036959 19\n", "are just -0.036889 -0.036889 18\n", "evaluating -0.036889 -0.036889 24\n", "besides -0.036819 -0.036819 23\n", "benefit of -0.036538 -0.036538 31\n", "properties of -0.036398 -0.036398 37\n", "to add -0.036328 -0.036328 42\n", "properties -0.035697 -0.035697 63\n", "are of -0.033943 -0.033943 6\n", "machines -0.033803 -0.033803 12\n", "observations and -0.033803 -0.033803 8\n", "closer to -0.033803 -0.033803 12\n", "even the -0.033663 -0.033663 10\n", "... ... ... ...\n", "tasks 0.201908 0.201908 152\n", "clearly 0.202819 0.202819 192\n", "'s 0.203100 0.203100 194\n", "their 0.204573 0.204573 282\n", "interesting 0.205695 0.205695 371\n", "easy to 0.211235 0.211235 118\n", "bit 0.211866 0.211866 143\n", "4 0.214181 0.214181 236\n", "network 0.214391 0.214391 265\n", "loss 0.215092 0.215092 137\n", "i am 0.219020 0.219020 150\n", "am 0.219160 0.219160 155\n", "you 0.223648 0.223648 203\n", "further 0.224770 0.224770 117\n", "particular 0.225682 0.225682 141\n", "easy 0.231994 0.231994 130\n", "makes 0.232274 0.232274 140\n", "previous 0.232555 0.232555 153\n", "given 0.237674 0.237674 216\n", "is well 0.239287 0.239287 153\n", "both 0.240830 0.240830 215\n", "way 0.243986 0.243986 197\n", "new 0.244828 0.244828 247\n", "novel 0.247002 0.247002 188\n", "first 0.247984 0.247984 226\n", "well written 0.253945 0.253945 200\n", "/ 0.258363 0.258363 232\n", "written 0.279473 0.279473 290\n", "about 0.319798 0.319798 249\n", "to see 0.328775 0.328775 185\n", "\n", "[31640 rows x 3 columns]" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "axes.sort_values(by='x')" ] }, { "cell_type": "code", "execution_count": 142, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
loridprankdiffsfssfs_p
0__ __ __ _
1$ $___
2dialognoveltytime seriestime series
3medicali doautoencoderautoencoder
4word2veclayersseriesreconstruction
5mutualgraphconnectionsseries
6mutual informationlimitedreconstruction$ $
7_ _claimnoveltyconnections
8miclass$ $classes
9auto encodersis noclassesnovelty
\n", "
" ], "text/plain": [ " loridp rankdiff sfs sfs_p\n", "0 _ _ _ _ _ _ _\n", "1 $ $ _ _ _\n", "2 dialog novelty time series time series\n", "3 medical i do autoencoder autoencoder\n", "4 word2vec layers series reconstruction\n", "5 mutual graph connections series\n", "6 mutual information limited reconstruction $ $\n", "7 _ _ claim novelty connections\n", "8 mi class $ $ classes\n", "9 auto encoders is no classes novelty" ] }, "execution_count": 142, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tdf = corpus.get_term_freq_df()\n", "tdf['sfs'] = ScaledFScorePresets(beta = 1).get_scores(tdf['Reject freq'], tdf['Accept freq'])\n", "tdf['sfs_p'] = ScaledFScorePresets(beta = 1, priors=priors).get_scores(tdf['Reject freq'], tdf['Accept freq'])\n", "tdf['loridp'] = st.LogOddsRatioInformativeDirichletPrior(priors, reviews_df.parse.apply(len).mean(), 'word').get_scores(tdf['Reject freq'], tdf['Accept freq'])\n", "tdf['rankdiff'] = st.RankDifference().get_scores(tdf['Reject freq'], tdf['Accept freq'])\n", "pd.DataFrame(\n", " {s:tdf.sort_values(by=s, ascending=False).iloc[::].index\n", " for s in ['sfs', 'sfs_p', 'loridp', 'rankdiff']\n", " }\n", ").iloc[:10]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')\n", "reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en', parser=False))\n", "\n", "# Create Corpus based on accept/reject/workshop decision\n", "# A two-category corpus to use for plotting, with unigrams which only occur in bigrams removed.\n", "# Terms used in <5 documents are removed as well.\n", "full_corpus = (\n", " st.CorpusFromParsedDocuments(reviews_df, category_col='decision', parsed_col='parse')\n", " .build().remove_categories(['Workshop']) \n", " .compact(st.CompactTerms(st.TermCompactor, minimum_term_count=6)) \n", ")\n", "\n", "\n", "# Use counts of unigrams and bigrams from the Workshop corpus as the Dirichlet prior\n", "priors = (st.PriorFactory(full_corpus, term_ranker=st.OncePerDocFrequencyRanker)\n", " .use_categories(['Workshop'].align_to_target(corpus).get_priors()))\n", "term_scorer = LogOddsRatioInformativeDirichletPrior(\n", " priors, reviews_df.parse.apply(len).mean(), 'word') # use the original approach to scaling prior\n", " \n", "html = st.produce_frequency_explorer(corpus, \n", " category='Accept', not_categories=['Reject'],\n", " term_ranker = st.OncePerDocFrequencyRanker,\n", " term_scorer = term_scorer,\n", " grey_threshold = 1.96,\n", " metadata = corpus.get_df()['metadata'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 56, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "6131293" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "file_name = 'accept_reject_loridp.html'\n", "open(file_name, 'wb').write(html.encode('utf-8'))\n", "#IFrame(src=file_name, width = 1500, height=700)" ] }, { "cell_type": "code", "execution_count": 146, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8424305" ] }, "execution_count": 146, "metadata": {}, "output_type": "execute_result" } ], "source": [ "html = st.produce_frequency_explorer(compact_corpus, \n", " category='Accept', \n", " not_categories=['Reject'],\n", " term_ranker = st.OncePerDocFrequencyRanker,\n", " term_scorer = st.RankDifference(),\n", " grey_threshold = 0, \n", " metadata = (corpus._df['title'] \n", " + '
Score: ' + corpus._df['rating'].apply(lambda x: x.split(':')[0]) + '/10'\n", " + '
Confidence: ' + corpus._df['confidence'].apply(lambda x: x.split(':')[0]) + '/5'))\n", "file_name = 'accept_reject_rankdiff.html'\n", "open(file_name, 'wb').write(html.encode('utf-8'))" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "four_square_corpus_phrases = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'category', parsed_col = 'parse',\n", " feats_from_spacy_doc=st.PhraseMachinePhrases())\n", " .build().compact(st.ClassPercentageCompactor(term_count=1)))" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7409359" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "four_square_axes = st.FourSquareAxes(four_square_corpus_phrases, \n", " left_categories=['Accept, Positive'], \n", " right_categories=['Accept, Negative'], \n", " top_categories=['Reject, Positive'], \n", " bottom_categories=['Reject, Negative'], \n", " labels = {'a': 'Positive',\n", " 'b': 'Review that was Contrary to Accpetance Decision',\n", " 'not_a': 'Negative',\n", " 'not_b': 'Review that in Line With Acceptance Decision'},\n", " term_ranker=st.OncePerDocFrequencyRanker)\n", "html = st.produce_four_square_axes_explorer(\n", " four_square_axes=four_square_axes,\n", " x_label=\"Accepts: Pos-Neg\",\n", " y_label='Rejects: Neg-Pos',\n", " use_full_doc=True,\n", " pmi_threshold_coefficient=0,\n", " censor_points=False,\n", " metadata=four_square_corpus_phrases.get_df()['metadata'],\n", " color_func='(function(d) {return d3.rgb(230, 220, 230)})',\n", ")\n", "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square_axes_phrases.html'\n", "open(file_name, 'wb').write(html.encode('utf-8'))\n", "#IFrame(src=file_name, width = 1500, height=700)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$$ \\log_2 \\frac{P(\\mbox{word1\" \"word2})}{P(\\mbox{word1}) \\times P(\\mbox{word2})} > 2 * \\mbox{pmi_threshold_coefficient}$$" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }