{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "    <div class=\"bk-root\">\n",
       "        <a href=\"https://bokeh.pydata.org\" target=\"_blank\" class=\"bk-logo bk-logo-small bk-logo-notebook\"></a>\n",
       "        <span id=\"4d60d675-a052-4534-8f26-36558ae35110\">Loading BokehJS ...</span>\n",
       "    </div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "\n",
       "(function(root) {\n",
       "  function now() {\n",
       "    return new Date();\n",
       "  }\n",
       "\n",
       "  var force = true;\n",
       "\n",
       "  if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n",
       "    root._bokeh_onload_callbacks = [];\n",
       "    root._bokeh_is_loading = undefined;\n",
       "  }\n",
       "\n",
       "  var JS_MIME_TYPE = 'application/javascript';\n",
       "  var HTML_MIME_TYPE = 'text/html';\n",
       "  var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n",
       "  var CLASS_NAME = 'output_bokeh rendered_html';\n",
       "\n",
       "  /**\n",
       "   * Render data to the DOM node\n",
       "   */\n",
       "  function render(props, node) {\n",
       "    var script = document.createElement(\"script\");\n",
       "    node.appendChild(script);\n",
       "  }\n",
       "\n",
       "  /**\n",
       "   * Handle when an output is cleared or removed\n",
       "   */\n",
       "  function handleClearOutput(event, handle) {\n",
       "    var cell = handle.cell;\n",
       "\n",
       "    var id = cell.output_area._bokeh_element_id;\n",
       "    var server_id = cell.output_area._bokeh_server_id;\n",
       "    // Clean up Bokeh references\n",
       "    if (id !== undefined) {\n",
       "      Bokeh.index[id].model.document.clear();\n",
       "      delete Bokeh.index[id];\n",
       "    }\n",
       "\n",
       "    if (server_id !== undefined) {\n",
       "      // Clean up Bokeh references\n",
       "      var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n",
       "      cell.notebook.kernel.execute(cmd, {\n",
       "        iopub: {\n",
       "          output: function(msg) {\n",
       "            var element_id = msg.content.text.trim();\n",
       "            Bokeh.index[element_id].model.document.clear();\n",
       "            delete Bokeh.index[element_id];\n",
       "          }\n",
       "        }\n",
       "      });\n",
       "      // Destroy server and session\n",
       "      var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n",
       "      cell.notebook.kernel.execute(cmd);\n",
       "    }\n",
       "  }\n",
       "\n",
       "  /**\n",
       "   * Handle when a new output is added\n",
       "   */\n",
       "  function handleAddOutput(event, handle) {\n",
       "    var output_area = handle.output_area;\n",
       "    var output = handle.output;\n",
       "\n",
       "    // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n",
       "    if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n",
       "      return\n",
       "    }\n",
       "\n",
       "    var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n",
       "\n",
       "    if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n",
       "      toinsert[0].firstChild.textContent = output.data[JS_MIME_TYPE];\n",
       "      // store reference to embed id on output_area\n",
       "      output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n",
       "    }\n",
       "    if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n",
       "      var bk_div = document.createElement(\"div\");\n",
       "      bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n",
       "      var script_attrs = bk_div.children[0].attributes;\n",
       "      for (var i = 0; i < script_attrs.length; i++) {\n",
       "        toinsert[0].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n",
       "      }\n",
       "      // store reference to server id on output_area\n",
       "      output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n",
       "    }\n",
       "  }\n",
       "\n",
       "  function register_renderer(events, OutputArea) {\n",
       "\n",
       "    function append_mime(data, metadata, element) {\n",
       "      // create a DOM node to render to\n",
       "      var toinsert = this.create_output_subarea(\n",
       "        metadata,\n",
       "        CLASS_NAME,\n",
       "        EXEC_MIME_TYPE\n",
       "      );\n",
       "      this.keyboard_manager.register_events(toinsert);\n",
       "      // Render to node\n",
       "      var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n",
       "      render(props, toinsert[0]);\n",
       "      element.append(toinsert);\n",
       "      return toinsert\n",
       "    }\n",
       "\n",
       "    /* Handle when an output is cleared or removed */\n",
       "    events.on('clear_output.CodeCell', handleClearOutput);\n",
       "    events.on('delete.Cell', handleClearOutput);\n",
       "\n",
       "    /* Handle when a new output is added */\n",
       "    events.on('output_added.OutputArea', handleAddOutput);\n",
       "\n",
       "    /**\n",
       "     * Register the mime type and append_mime function with output_area\n",
       "     */\n",
       "    OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n",
       "      /* Is output safe? */\n",
       "      safe: true,\n",
       "      /* Index of renderer in `output_area.display_order` */\n",
       "      index: 0\n",
       "    });\n",
       "  }\n",
       "\n",
       "  // register the mime type if in Jupyter Notebook environment and previously unregistered\n",
       "  if (root.Jupyter !== undefined) {\n",
       "    var events = require('base/js/events');\n",
       "    var OutputArea = require('notebook/js/outputarea').OutputArea;\n",
       "\n",
       "    if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n",
       "      register_renderer(events, OutputArea);\n",
       "    }\n",
       "  }\n",
       "\n",
       "  \n",
       "  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n",
       "    root._bokeh_timeout = Date.now() + 5000;\n",
       "    root._bokeh_failed_load = false;\n",
       "  }\n",
       "\n",
       "  var NB_LOAD_WARNING = {'data': {'text/html':\n",
       "     \"<div style='background-color: #fdd'>\\n\"+\n",
       "     \"<p>\\n\"+\n",
       "     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n",
       "     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n",
       "     \"</p>\\n\"+\n",
       "     \"<ul>\\n\"+\n",
       "     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n",
       "     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n",
       "     \"</ul>\\n\"+\n",
       "     \"<code>\\n\"+\n",
       "     \"from bokeh.resources import INLINE\\n\"+\n",
       "     \"output_notebook(resources=INLINE)\\n\"+\n",
       "     \"</code>\\n\"+\n",
       "     \"</div>\"}};\n",
       "\n",
       "  function display_loaded() {\n",
       "    var el = document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\");\n",
       "    if (el != null) {\n",
       "      el.textContent = \"BokehJS is loading...\";\n",
       "    }\n",
       "    if (root.Bokeh !== undefined) {\n",
       "      if (el != null) {\n",
       "        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n",
       "      }\n",
       "    } else if (Date.now() < root._bokeh_timeout) {\n",
       "      setTimeout(display_loaded, 100)\n",
       "    }\n",
       "  }\n",
       "\n",
       "\n",
       "  function run_callbacks() {\n",
       "    try {\n",
       "      root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n",
       "    }\n",
       "    finally {\n",
       "      delete root._bokeh_onload_callbacks\n",
       "    }\n",
       "    console.info(\"Bokeh: all callbacks have finished\");\n",
       "  }\n",
       "\n",
       "  function load_libs(js_urls, callback) {\n",
       "    root._bokeh_onload_callbacks.push(callback);\n",
       "    if (root._bokeh_is_loading > 0) {\n",
       "      console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n",
       "      return null;\n",
       "    }\n",
       "    if (js_urls == null || js_urls.length === 0) {\n",
       "      run_callbacks();\n",
       "      return null;\n",
       "    }\n",
       "    console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n",
       "    root._bokeh_is_loading = js_urls.length;\n",
       "    for (var i = 0; i < js_urls.length; i++) {\n",
       "      var url = js_urls[i];\n",
       "      var s = document.createElement('script');\n",
       "      s.src = url;\n",
       "      s.async = false;\n",
       "      s.onreadystatechange = s.onload = function() {\n",
       "        root._bokeh_is_loading--;\n",
       "        if (root._bokeh_is_loading === 0) {\n",
       "          console.log(\"Bokeh: all BokehJS libraries loaded\");\n",
       "          run_callbacks()\n",
       "        }\n",
       "      };\n",
       "      s.onerror = function() {\n",
       "        console.warn(\"failed to load library \" + url);\n",
       "      };\n",
       "      console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n",
       "      document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "    }\n",
       "  };var element = document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\");\n",
       "  if (element == null) {\n",
       "    console.log(\"Bokeh: ERROR: autoload.js configured with elementid '4d60d675-a052-4534-8f26-36558ae35110' but no matching script tag was found. \")\n",
       "    return false;\n",
       "  }\n",
       "\n",
       "  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.13.min.js\"];\n",
       "\n",
       "  var inline_js = [\n",
       "    function(Bokeh) {\n",
       "      Bokeh.set_log_level(\"info\");\n",
       "    },\n",
       "    \n",
       "    function(Bokeh) {\n",
       "      \n",
       "    },\n",
       "    function(Bokeh) {\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.css\");\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.css\");\n",
       "      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.css\");\n",
       "      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.css\");\n",
       "    }\n",
       "  ];\n",
       "\n",
       "  function run_inline_js() {\n",
       "    \n",
       "    if ((root.Bokeh !== undefined) || (force === true)) {\n",
       "      for (var i = 0; i < inline_js.length; i++) {\n",
       "        inline_js[i].call(root, root.Bokeh);\n",
       "      }if (force === true) {\n",
       "        display_loaded();\n",
       "      }} else if (Date.now() < root._bokeh_timeout) {\n",
       "      setTimeout(run_inline_js, 100);\n",
       "    } else if (!root._bokeh_failed_load) {\n",
       "      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n",
       "      root._bokeh_failed_load = true;\n",
       "    } else if (force !== true) {\n",
       "      var cell = $(document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\")).parents('.cell').data().cell;\n",
       "      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n",
       "    }\n",
       "\n",
       "  }\n",
       "\n",
       "  if (root._bokeh_is_loading === 0) {\n",
       "    console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n",
       "    run_inline_js();\n",
       "  } else {\n",
       "    load_libs(js_urls, function() {\n",
       "      console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n",
       "      run_inline_js();\n",
       "    });\n",
       "  }\n",
       "}(window));"
      ],
      "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n  function now() {\n    return new Date();\n  }\n\n  var force = true;\n\n  if (typeof (root._bokeh_onload_callbacks) === \"undefined\" || force === true) {\n    root._bokeh_onload_callbacks = [];\n    root._bokeh_is_loading = undefined;\n  }\n\n  \n\n  \n  if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n    root._bokeh_timeout = Date.now() + 5000;\n    root._bokeh_failed_load = false;\n  }\n\n  var NB_LOAD_WARNING = {'data': {'text/html':\n     \"<div style='background-color: #fdd'>\\n\"+\n     \"<p>\\n\"+\n     \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n     \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n     \"</p>\\n\"+\n     \"<ul>\\n\"+\n     \"<li>re-rerun `output_notebook()` to attempt to load from CDN again, or</li>\\n\"+\n     \"<li>use INLINE resources instead, as so:</li>\\n\"+\n     \"</ul>\\n\"+\n     \"<code>\\n\"+\n     \"from bokeh.resources import INLINE\\n\"+\n     \"output_notebook(resources=INLINE)\\n\"+\n     \"</code>\\n\"+\n     \"</div>\"}};\n\n  function display_loaded() {\n    var el = document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\");\n    if (el != null) {\n      el.textContent = \"BokehJS is loading...\";\n    }\n    if (root.Bokeh !== undefined) {\n      if (el != null) {\n        el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n      }\n    } else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(display_loaded, 100)\n    }\n  }\n\n\n  function run_callbacks() {\n    try {\n      root._bokeh_onload_callbacks.forEach(function(callback) { callback() });\n    }\n    finally {\n      delete root._bokeh_onload_callbacks\n    }\n    console.info(\"Bokeh: all callbacks have finished\");\n  }\n\n  function load_libs(js_urls, callback) {\n    root._bokeh_onload_callbacks.push(callback);\n    if (root._bokeh_is_loading > 0) {\n      console.log(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n      return null;\n    }\n    if (js_urls == null || js_urls.length === 0) {\n      run_callbacks();\n      return null;\n    }\n    console.log(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n    root._bokeh_is_loading = js_urls.length;\n    for (var i = 0; i < js_urls.length; i++) {\n      var url = js_urls[i];\n      var s = document.createElement('script');\n      s.src = url;\n      s.async = false;\n      s.onreadystatechange = s.onload = function() {\n        root._bokeh_is_loading--;\n        if (root._bokeh_is_loading === 0) {\n          console.log(\"Bokeh: all BokehJS libraries loaded\");\n          run_callbacks()\n        }\n      };\n      s.onerror = function() {\n        console.warn(\"failed to load library \" + url);\n      };\n      console.log(\"Bokeh: injecting script tag for BokehJS library: \", url);\n      document.getElementsByTagName(\"head\")[0].appendChild(s);\n    }\n  };var element = document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\");\n  if (element == null) {\n    console.log(\"Bokeh: ERROR: autoload.js configured with elementid '4d60d675-a052-4534-8f26-36558ae35110' but no matching script tag was found. \")\n    return false;\n  }\n\n  var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.js\", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-0.12.13.min.js\"];\n\n  var inline_js = [\n    function(Bokeh) {\n      Bokeh.set_log_level(\"info\");\n    },\n    \n    function(Bokeh) {\n      \n    },\n    function(Bokeh) {\n      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.css\");\n      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-0.12.13.min.css\");\n      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.css\");\n      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.13.min.css\");\n      console.log(\"Bokeh: injecting CSS: https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.css\");\n      Bokeh.embed.inject_css(\"https://cdn.pydata.org/bokeh/release/bokeh-tables-0.12.13.min.css\");\n    }\n  ];\n\n  function run_inline_js() {\n    \n    if ((root.Bokeh !== undefined) || (force === true)) {\n      for (var i = 0; i < inline_js.length; i++) {\n        inline_js[i].call(root, root.Bokeh);\n      }if (force === true) {\n        display_loaded();\n      }} else if (Date.now() < root._bokeh_timeout) {\n      setTimeout(run_inline_js, 100);\n    } else if (!root._bokeh_failed_load) {\n      console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n      root._bokeh_failed_load = true;\n    } else if (force !== true) {\n      var cell = $(document.getElementById(\"4d60d675-a052-4534-8f26-36558ae35110\")).parents('.cell').data().cell;\n      cell.output_area.append_execute_result(NB_LOAD_WARNING)\n    }\n\n  }\n\n  if (root._bokeh_is_loading === 0) {\n    console.log(\"Bokeh: BokehJS loaded, going straight to plotting\");\n    run_inline_js();\n  } else {\n    load_libs(js_urls, function() {\n      console.log(\"Bokeh: BokehJS plotting callback run at\", now());\n      run_inline_js();\n    });\n  }\n}(window));"
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<style>.container { width:98% !important; }</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import requests\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scattertext as st\n",
    "import spacy\n",
    "import time\n",
    "from IPython.display import IFrame\n",
    "from IPython.core.display import display, HTML\n",
    "from bokeh.palettes import PuBu\n",
    "from bokeh.io import show, output_notebook\n",
    "from bokeh.models import ColumnDataSource, ranges, LabelSet\n",
    "from bokeh.plotting import figure\n",
    "output_notebook()\n",
    "display(HTML(\"<style>.container { width:98% !important; }</style>\"))\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "assert [int(x) for x in st.__version__.split('.')] >=[0,0,2,20]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "url = 'https://openreview.net/notes?invitation=ICLR.cc%2F2018%2FConference%2F-%2FBlind_Submission&offset=0&limit=1000'\n",
    "df = pd.DataFrame(requests.get(url).json()['notes'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'df' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-1-2b18eea99cb5>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mforum_content\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforum_id\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mforum\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m     \u001b[0mnotes_url\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'https://openreview.net/notes?forum={}&trash=true'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mforum_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m         \u001b[0mforum_content\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnotes_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined"
     ]
    }
   ],
   "source": [
    "forum_content = []\n",
    "for i, forum_id in list(enumerate(df.forum)):\n",
    "    notes_url = 'https://openreview.net/notes?forum={}&trash=true'.format(forum_id)\n",
    "    try:\n",
    "        forum_content.append(requests.get(notes_url).json())\n",
    "    except:\n",
    "        print('err', i, forum_id)\n",
    "        forum_content = {}\n",
    "    time.sleep(.3)\n",
    "df['forumContent'] = pd.Series(forum_content)   \n",
    "\n",
    "df.to_csv('iclr2018_raw.csv.bz2', index=False, compression='bz2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#\n",
    "# Code here is to read locally:\n",
    "read_local = True\n",
    "if read_local:\n",
    "    df = pd.read_csv('iclr2018_raw.csv.bz2')\n",
    "    df['forumContent'] = df.forumContent.apply(eval) # totally unsafe\n",
    "    df['content'] = df.content.apply(eval) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Reject                      504\n",
       "Accept (Poster)             313\n",
       "Invite to Workshop Track     90\n",
       "Accept (Oral)                23\n",
       "Name: decision_raw, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['decision_raw'] = df.forumContent.apply(lambda x:[n['content']['decision'] \n",
    "                                                     for n in x['notes'] \n",
    "                                                     if 'decision' in n['content']][0])\n",
    "df['decision_raw'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "930"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['title'] = df.content.apply(lambda x: x['title'])\n",
    "df['authors'] = df.content.apply(lambda x: x['authors'])\n",
    "\n",
    "only_reviews_df = pd.concat(df.forumContent.apply(lambda c: pd.DataFrame([\n",
    "    {'review': n['content']['review'], \n",
    "     'rating': n['content']['rating'],  \n",
    "     'confidence': n['content']['confidence'],\n",
    "     'forum': n['forum']} \n",
    "    for n in c['notes'] \n",
    "    if 'content' in n and 'review' in n['content']\n",
    "])).tolist())\n",
    "reviews_df = pd.merge(df[['title', 'authors', 'decision_raw', 'forum']], only_reviews_df, on='forum')\n",
    "#reviews_df.groupby('decision_raw')['rating'].value_counts()\n",
    "reviews_df['decision'] = (reviews_df['decision_raw']\n",
    "                          .apply(lambda x: 'Reject' if x == 'Reject' \n",
    "                                 else ('Accept' if x.startswith('Accept') \n",
    "                                       else 'Workshop')))\n",
    "reviews_df['rating_bin'] = (reviews_df['rating']\n",
    "                            .apply(lambda x: (lambda s: 'Negative' if s < 5 \n",
    "                                              else ('Positive' if s > 6 else 'Neutral'))\n",
    "                                   (int(x.split(':')[0].strip()))))\n",
    "reviews_df['category'] = reviews_df['decision'] + ', ' + reviews_df['rating_bin']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<div class=\"bk-root\">\n",
       "    <div class=\"bk-plotdiv\" id=\"a7ac1fc1-d8dc-4943-8713-8eaa635969ee\"></div>\n",
       "</div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "(function(root) {\n",
       "  function embed_document(root) {\n",
       "    \n",
       "  var docs_json = {\"04c8c15c-980f-4431-b62b-a6dcc72ef801\":{\"roots\":{\"references\":[{\"attributes\":{\"axis_label\":\"Paper Count\",\"formatter\":{\"id\":\"3c397ccd-8924-451a-a3dc-9af7e9230903\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"86920c3e-d9f3-4110-b1cf-874d7f3c4af3\",\"type\":\"BasicTicker\"}},\"id\":\"586cf370-3e20-48cd-9274-225a237cb4cc\",\"type\":\"LinearAxis\"},{\"attributes\":{\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"a0d191bb-0042-407e-82e3-6bc2458b8158\",\"type\":\"CategoricalTicker\"}},\"id\":\"cf6b767a-0362-4d8a-adc0-415ab6e34ae8\",\"type\":\"Grid\"},{\"attributes\":{\"source\":{\"id\":\"0ec96012-280e-40e6-b18f-9433ffa3d58e\",\"type\":\"ColumnDataSource\"}},\"id\":\"1afc5c41-ab48-4f9d-aa62-8c66034500c6\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"3c397ccd-8924-451a-a3dc-9af7e9230903\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"a0d191bb-0042-407e-82e3-6bc2458b8158\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"44bbb868-46d5-46bd-bd88-c8774d33fa9b\",\"type\":\"SaveTool\"},{\"attributes\":{\"level\":\"glyph\",\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"source\":{\"id\":\"0ec96012-280e-40e6-b18f-9433ffa3d58e\",\"type\":\"ColumnDataSource\"},\"text\":{\"field\":\"y\"},\"x\":{\"field\":\"x\"},\"x_offset\":{\"value\":-13.5},\"y\":{\"field\":\"y\"}},\"id\":\"66b3bebd-5172-4f50-9042-499885e6d33b\",\"type\":\"LabelSet\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"top\":{\"field\":\"y\"},\"width\":{\"value\":0.3},\"x\":{\"field\":\"x\"}},\"id\":\"a1223a80-602a-467a-8756-7da9f4fa7dbf\",\"type\":\"VBar\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"86920c3e-d9f3-4110-b1cf-874d7f3c4af3\",\"type\":\"BasicTicker\"}},\"id\":\"5d732d58-a986-4351-84f4-9405c08c0d15\",\"type\":\"Grid\"},{\"attributes\":{\"data_source\":{\"id\":\"0ec96012-280e-40e6-b18f-9433ffa3d58e\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"4907ffff-249f-455f-8a8c-646f68c597b0\",\"type\":\"VBar\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"a1223a80-602a-467a-8756-7da9f4fa7dbf\",\"type\":\"VBar\"},\"selection_glyph\":null,\"view\":{\"id\":\"1afc5c41-ab48-4f9d-aa62-8c66034500c6\",\"type\":\"CDSView\"}},\"id\":\"8a0bf3e0-ef0a-4326-986c-7dcdf781867f\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"86920c3e-d9f3-4110-b1cf-874d7f3c4af3\",\"type\":\"BasicTicker\"},{\"attributes\":{},\"id\":\"cf2a9102-8691-4eec-a72c-a978e9f6e1a7\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"plot\":null,\"text\":\"\"},\"id\":\"06197a5f-0441-4cc1-9957-b9ba00feb06b\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"column_names\":[\"y\",\"x\"],\"data\":{\"x\":[\"Reject\",\"Accept (Poster)\",\"Invite to Workshop Track\",\"Accept (Oral)\"],\"y\":[503,313,90,23]}},\"id\":\"0ec96012-280e-40e6-b18f-9433ffa3d58e\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"44bbb868-46d5-46bd-bd88-c8774d33fa9b\",\"type\":\"SaveTool\"}]},\"id\":\"37ae1413-538f-4b21-b190-439ba28d7543\",\"type\":\"Toolbar\"},{\"attributes\":{\"below\":[{\"id\":\"02ccde59-350b-4d45-94d0-ca67ca1b40f6\",\"type\":\"CategoricalAxis\"}],\"left\":[{\"id\":\"586cf370-3e20-48cd-9274-225a237cb4cc\",\"type\":\"LinearAxis\"}],\"plot_height\":300,\"renderers\":[{\"id\":\"02ccde59-350b-4d45-94d0-ca67ca1b40f6\",\"type\":\"CategoricalAxis\"},{\"id\":\"cf6b767a-0362-4d8a-adc0-415ab6e34ae8\",\"type\":\"Grid\"},{\"id\":\"586cf370-3e20-48cd-9274-225a237cb4cc\",\"type\":\"LinearAxis\"},{\"id\":\"5d732d58-a986-4351-84f4-9405c08c0d15\",\"type\":\"Grid\"},{\"id\":\"8a0bf3e0-ef0a-4326-986c-7dcdf781867f\",\"type\":\"GlyphRenderer\"},{\"id\":\"66b3bebd-5172-4f50-9042-499885e6d33b\",\"type\":\"LabelSet\"}],\"title\":{\"id\":\"06197a5f-0441-4cc1-9957-b9ba00feb06b\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"37ae1413-538f-4b21-b190-439ba28d7543\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"bb66180b-3575-4b26-bd58-94a327b11ee0\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"cf2a9102-8691-4eec-a72c-a978e9f6e1a7\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"0fb28cfb-111c-41f1-8d18-411fde8c005c\",\"type\":\"Range1d\"},\"y_scale\":{\"id\":\"dbaaec8b-fb9b-4ce6-bfaa-d4a08c80fc8e\",\"type\":\"LinearScale\"}},\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"fill_color\":{\"value\":\"#3690c0\"},\"line_color\":{\"value\":\"#3690c0\"},\"top\":{\"field\":\"y\"},\"width\":{\"value\":0.3},\"x\":{\"field\":\"x\"}},\"id\":\"4907ffff-249f-455f-8a8c-646f68c597b0\",\"type\":\"VBar\"},{\"attributes\":{},\"id\":\"dbaaec8b-fb9b-4ce6-bfaa-d4a08c80fc8e\",\"type\":\"LinearScale\"},{\"attributes\":{\"callback\":null,\"end\":600},\"id\":\"0fb28cfb-111c-41f1-8d18-411fde8c005c\",\"type\":\"Range1d\"},{\"attributes\":{},\"id\":\"0258166d-9faf-4814-8706-9b336777b00a\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Reject\",\"Accept (Poster)\",\"Invite to Workshop Track\",\"Accept (Oral)\"]},\"id\":\"bb66180b-3575-4b26-bd58-94a327b11ee0\",\"type\":\"FactorRange\"},{\"attributes\":{\"axis_label\":\"Decision\",\"formatter\":{\"id\":\"0258166d-9faf-4814-8706-9b336777b00a\",\"type\":\"CategoricalTickFormatter\"},\"plot\":{\"id\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"a0d191bb-0042-407e-82e3-6bc2458b8158\",\"type\":\"CategoricalTicker\"}},\"id\":\"02ccde59-350b-4d45-94d0-ca67ca1b40f6\",\"type\":\"CategoricalAxis\"}],\"root_ids\":[\"e08dad28-652d-461f-8edd-1e4bd2a3c114\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.13\"}};\n",
       "  var render_items = [{\"docid\":\"04c8c15c-980f-4431-b62b-a6dcc72ef801\",\"elementid\":\"a7ac1fc1-d8dc-4943-8713-8eaa635969ee\",\"modelid\":\"e08dad28-652d-461f-8edd-1e4bd2a3c114\"}];\n",
       "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
       "\n",
       "  }\n",
       "  if (root.Bokeh !== undefined) {\n",
       "    embed_document(root);\n",
       "  } else {\n",
       "    var attempts = 0;\n",
       "    var timer = setInterval(function(root) {\n",
       "      if (root.Bokeh !== undefined) {\n",
       "        embed_document(root);\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "      attempts++;\n",
       "      if (attempts > 100) {\n",
       "        console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "    }, 10, root)\n",
       "  }\n",
       "})(window);"
      ],
      "application/vnd.bokehjs_exec.v0+json": ""
     },
     "metadata": {
      "application/vnd.bokehjs_exec.v0+json": {
       "id": "e08dad28-652d-461f-8edd-1e4bd2a3c114"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "decisions = reviews_df[['forum','decision_raw']].drop_duplicates()['decision_raw'].value_counts()\n",
    "source = ColumnDataSource(dict(x=list(decisions.index),y=decisions.values))\n",
    "\n",
    "#source = ColumnDataSource({'x': decisions.index, 'y': decisions.values}\n",
    "\n",
    "plot = figure(plot_width=600, plot_height=300, tools=\"save\",\n",
    "        x_axis_label = \"Decision\",\n",
    "        y_axis_label = \"Paper Count\",\n",
    "        title=\"\",\n",
    "        x_minor_ticks=2,\n",
    "        x_range = source.data[\"x\"],\n",
    "        y_range= ranges.Range1d(start=0,end=600))\n",
    "\n",
    "\n",
    "labels = LabelSet(x='x', y='y', text='y', level='glyph',\n",
    "        x_offset=-13.5, y_offset=0, source=source, render_mode='canvas')\n",
    "\n",
    "plot.vbar(source=source,x='x',top='y',bottom=0,width=0.3,color=PuBu[7][2])\n",
    "\n",
    "plot.add_layout(labels)\n",
    "show(plot)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/kesslej/anaconda3/lib/python3.5/site-packages/bokeh/core/json_encoder.py:80: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  elif np.issubdtype(type(obj), np.float):\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "<div class=\"bk-root\">\n",
       "    <div class=\"bk-plotdiv\" id=\"5e279a98-1f57-43ac-9e66-3a4931885037\"></div>\n",
       "</div>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/javascript": [
       "(function(root) {\n",
       "  function embed_document(root) {\n",
       "    \n",
       "  var docs_json = {\"3992f4c1-1c27-41f0-95d0-459441571f79\":{\"roots\":{\"references\":[{\"attributes\":{\"source\":{\"id\":\"3d18162c-6726-4ca0-9a04-66b34e6acc93\",\"type\":\"ColumnDataSource\"}},\"id\":\"cbf4d423-e941-4de1-b1e8-7d5c14dbc49f\",\"type\":\"CDSView\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"7ca8c5c4-5132-4a72-9349-5ff57e5478eb\",\"type\":\"SaveTool\"}]},\"id\":\"04c36a4c-7afb-4b07-afd0-d938f933a67f\",\"type\":\"Toolbar\"},{\"attributes\":{\"fill_color\":{\"value\":\"#3690c0\"},\"line_color\":{\"value\":\"#3690c0\"},\"top\":{\"field\":\"y\"},\"width\":{\"value\":0.3},\"x\":{\"field\":\"x\"}},\"id\":\"7e9e282f-9711-49c8-aff6-8a547f42590c\",\"type\":\"VBar\"},{\"attributes\":{\"callback\":null,\"factors\":[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\",\"10\"]},\"id\":\"2d77e4a6-b55e-45ec-9b05-57756cae6a52\",\"type\":\"FactorRange\"},{\"attributes\":{\"data_source\":{\"id\":\"3d18162c-6726-4ca0-9a04-66b34e6acc93\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"7e9e282f-9711-49c8-aff6-8a547f42590c\",\"type\":\"VBar\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"718454da-4d02-44ec-91cf-e45407ab3171\",\"type\":\"VBar\"},\"selection_glyph\":null,\"view\":{\"id\":\"cbf4d423-e941-4de1-b1e8-7d5c14dbc49f\",\"type\":\"CDSView\"}},\"id\":\"b6254c76-91a8-41c0-9c11-7f125b45f4cd\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"axis_label\":\"Rating\",\"formatter\":{\"id\":\"ff7aec16-5f93-40dc-a810-1b0afd4d23cc\",\"type\":\"CategoricalTickFormatter\"},\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"05a341e8-b9a6-411f-8200-16fcdd1ed9db\",\"type\":\"CategoricalTicker\"}},\"id\":\"cc64e842-04e8-4d3c-9a9e-ddb01066e387\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"4c20e64f-7e69-4552-a33d-a8d4744fe3cb\",\"type\":\"LinearScale\"},{\"attributes\":{},\"id\":\"7ca8c5c4-5132-4a72-9349-5ff57e5478eb\",\"type\":\"SaveTool\"},{\"attributes\":{},\"id\":\"ce818dfa-c79d-4a08-aa91-f035ed3cacee\",\"type\":\"BasicTickFormatter\"},{\"attributes\":{},\"id\":\"ff7aec16-5f93-40dc-a810-1b0afd4d23cc\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"dimension\":1,\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"6fe4df45-f5b6-4457-a258-7612e69f3e73\",\"type\":\"BasicTicker\"}},\"id\":\"5d0affcf-a6c3-4b09-8644-b9dcc44c91b0\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"6fe4df45-f5b6-4457-a258-7612e69f3e73\",\"type\":\"BasicTicker\"},{\"attributes\":{\"callback\":null,\"column_names\":[\"y\",\"x\"],\"data\":{\"x\":[\"1\",\"2\",\"3\",\"4\",\"5\",\"6\",\"7\",\"8\",\"9\",\"10\"],\"y\":[5,63,234,557,576,626,526,170,47,2]}},\"id\":\"3d18162c-6726-4ca0-9a04-66b34e6acc93\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"05a341e8-b9a6-411f-8200-16fcdd1ed9db\",\"type\":\"CategoricalTicker\"}},\"id\":\"cf5ed539-ae32-4ec8-9156-5cb47e9d3dfd\",\"type\":\"Grid\"},{\"attributes\":{\"callback\":null,\"end\":726},\"id\":\"58b3d85b-ba67-4d16-881f-316baa7349dc\",\"type\":\"Range1d\"},{\"attributes\":{},\"id\":\"6498ef77-52c7-4804-8e6e-a271b63c34c9\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"axis_label\":\"Review Count\",\"formatter\":{\"id\":\"ce818dfa-c79d-4a08-aa91-f035ed3cacee\",\"type\":\"BasicTickFormatter\"},\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"ticker\":{\"id\":\"6fe4df45-f5b6-4457-a258-7612e69f3e73\",\"type\":\"BasicTicker\"}},\"id\":\"0d11ad06-89cd-4e26-a278-3ea5cb624918\",\"type\":\"LinearAxis\"},{\"attributes\":{\"below\":[{\"id\":\"cc64e842-04e8-4d3c-9a9e-ddb01066e387\",\"type\":\"CategoricalAxis\"}],\"left\":[{\"id\":\"0d11ad06-89cd-4e26-a278-3ea5cb624918\",\"type\":\"LinearAxis\"}],\"plot_height\":300,\"renderers\":[{\"id\":\"cc64e842-04e8-4d3c-9a9e-ddb01066e387\",\"type\":\"CategoricalAxis\"},{\"id\":\"cf5ed539-ae32-4ec8-9156-5cb47e9d3dfd\",\"type\":\"Grid\"},{\"id\":\"0d11ad06-89cd-4e26-a278-3ea5cb624918\",\"type\":\"LinearAxis\"},{\"id\":\"5d0affcf-a6c3-4b09-8644-b9dcc44c91b0\",\"type\":\"Grid\"},{\"id\":\"b6254c76-91a8-41c0-9c11-7f125b45f4cd\",\"type\":\"GlyphRenderer\"},{\"id\":\"caadb636-d409-48ec-aca2-7fa38fb1db68\",\"type\":\"LabelSet\"}],\"title\":{\"id\":\"49435357-01d4-4cf1-8ead-7fb214572b45\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"04c36a4c-7afb-4b07-afd0-d938f933a67f\",\"type\":\"Toolbar\"},\"x_range\":{\"id\":\"2d77e4a6-b55e-45ec-9b05-57756cae6a52\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"6498ef77-52c7-4804-8e6e-a271b63c34c9\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"58b3d85b-ba67-4d16-881f-316baa7349dc\",\"type\":\"Range1d\"},\"y_scale\":{\"id\":\"4c20e64f-7e69-4552-a33d-a8d4744fe3cb\",\"type\":\"LinearScale\"}},\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"plot\":null,\"text\":\"\"},\"id\":\"49435357-01d4-4cf1-8ead-7fb214572b45\",\"type\":\"Title\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"top\":{\"field\":\"y\"},\"width\":{\"value\":0.3},\"x\":{\"field\":\"x\"}},\"id\":\"718454da-4d02-44ec-91cf-e45407ab3171\",\"type\":\"VBar\"},{\"attributes\":{},\"id\":\"05a341e8-b9a6-411f-8200-16fcdd1ed9db\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"level\":\"glyph\",\"plot\":{\"id\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\",\"subtype\":\"Figure\",\"type\":\"Plot\"},\"source\":{\"id\":\"3d18162c-6726-4ca0-9a04-66b34e6acc93\",\"type\":\"ColumnDataSource\"},\"text\":{\"field\":\"y\"},\"x\":{\"field\":\"x\"},\"x_offset\":{\"value\":-13.5},\"y\":{\"field\":\"y\"}},\"id\":\"caadb636-d409-48ec-aca2-7fa38fb1db68\",\"type\":\"LabelSet\"}],\"root_ids\":[\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\"]},\"title\":\"Bokeh Application\",\"version\":\"0.12.13\"}};\n",
       "  var render_items = [{\"docid\":\"3992f4c1-1c27-41f0-95d0-459441571f79\",\"elementid\":\"5e279a98-1f57-43ac-9e66-3a4931885037\",\"modelid\":\"409a6481-03e6-4a1b-a9c7-ff130ed0c75c\"}];\n",
       "  root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n",
       "\n",
       "  }\n",
       "  if (root.Bokeh !== undefined) {\n",
       "    embed_document(root);\n",
       "  } else {\n",
       "    var attempts = 0;\n",
       "    var timer = setInterval(function(root) {\n",
       "      if (root.Bokeh !== undefined) {\n",
       "        embed_document(root);\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "      attempts++;\n",
       "      if (attempts > 100) {\n",
       "        console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\")\n",
       "        clearInterval(timer);\n",
       "      }\n",
       "    }, 10, root)\n",
       "  }\n",
       "})(window);"
      ],
      "application/vnd.bokehjs_exec.v0+json": ""
     },
     "metadata": {
      "application/vnd.bokehjs_exec.v0+json": {
       "id": "409a6481-03e6-4a1b-a9c7-ff130ed0c75c"
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "ratings = reviews_df['rating'].value_counts()\n",
    "ratings.index = [int(c.split(':')[0]) for c in ratings.index]\n",
    "ratings = ratings.sort_index()\n",
    "source = ColumnDataSource(dict(x=[str(x) for x in ratings.index],y=ratings.values))\n",
    "\n",
    "plot = figure(plot_width=600, plot_height=300, tools=\"save\",\n",
    "        x_axis_label = \"Rating\",\n",
    "        y_axis_label = \"Review Count\",\n",
    "        title=\"\",\n",
    "        x_minor_ticks=2,\n",
    "        x_range = source.data[\"x\"],\n",
    "        y_range= ranges.Range1d(start=0,end=ratings.max() + 100))\n",
    "\n",
    "\n",
    "labels = LabelSet(x='x', y='y', text='y', level='glyph',\n",
    "        x_offset=-13.5, y_offset=0, source=source, render_mode='canvas')\n",
    "\n",
    "plot.vbar(source=source,x='x',top='y',bottom=0,width=0.3,color=PuBu[7][2])\n",
    "\n",
    "plot.add_layout(labels)\n",
    "show(plot)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "reviews_df['metadata'] = (\n",
    "    reviews_df['title'] + '<br/>Score: ' + reviews_df['rating'].apply(lambda x: x.split(':')[0]) + '/10'\n",
    "    + '<br/>Confidence: ' + reviews_df['confidence'].apply(lambda x: x.split(':')[0]) + '/5'\n",
    "    + '<br/>Ultimate decision: ' + reviews_df['decision'].apply(lambda x: x.split(':')[0]) + '/10'\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "reviews_df.to_csv('iclr2018_reviews.csv.bz2', index=False, compression='bz2')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Start here for NLP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "nlp = spacy.load('en')\n",
    "reviews_df['parse'] = reviews_df['review'].apply(nlp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'rating_bin', parsed_col = 'parse')\n",
    "          .build()\n",
    "          .remove_categories(['Neutral']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6131293"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "html = st.produce_scattertext_explorer(corpus, \n",
    "                                       category='Positive', \n",
    "                                       not_categories=['Negative'],\n",
    "                                       transform = st.Scalers.percentile_dense,\n",
    "                                       term_scorer = st.RankDifference(),\n",
    "                                       metadata = corpus.get_df()['metadata'])\n",
    "file_name = '../jasonkessler.github.io/iclr2018reviews/pos_neg_dense.html'\n",
    "open(file_name, 'wb').write(html.encode('utf-8'))\n",
    "#IFrame(src=file_name, width = 1500, height=700)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "four_square_corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'category', parsed_col = 'parse')\n",
    "                      .build()\n",
    "                      .get_unigram_corpus()\n",
    "                      .compact(st.ClassPercentageCompactor(term_count=1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8267592"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square_axes.html'\n",
    "open(file_name, 'wb').write(html.encode('utf-8'))\n",
    "#IFrame(src=file_name, width = 1500, height=700)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8205863"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "four_square_axes = st.FourSquareAxes(four_square_corpus, \n",
    "                                     left_categories=['Accept, Positive'], \n",
    "                                     right_categories=['Accept, Negative'], \n",
    "                                     top_categories=['Reject, Positive'], \n",
    "                                     bottom_categories=['Reject, Negative'], \n",
    "                                     labels = {'a': 'Positive',\n",
    "                                               'b': 'Review that was Contrary to Accpetance Decision',\n",
    "                                               'not_a': 'Negative',\n",
    "                                               'not_b': 'Review that in Line With Acceptance Decision'},\n",
    "                                     term_ranker=st.OncePerDocFrequencyRanker)\n",
    "html = st.produce_four_square_axes_explorer(\n",
    "    four_square_axes=four_square_axes,\n",
    "    x_label=\"Accepts: Pos-Neg\",\n",
    "    y_label='Rejects: Neg-Pos',\n",
    "    use_full_doc=True,\n",
    "    metadata=four_square_corpus.get_df()['metadata'],\n",
    "    color_func='(function(d) {return d3.rgb(230, 220, 230)})',\n",
    "    censor_points = False,\n",
    ")\n",
    "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square_axes_display.html'\n",
    "open(file_name, 'wb').write(html.encode('utf-8'))\n",
    "#IFrame(src=file_name, width = 1500, height=700)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8205862"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "four_square_axes = st.FourSquareAxes(four_square_corpus, \n",
    "                                     left_categories=['Accept, Positive'], \n",
    "                                     right_categories=['Accept, Negative'], \n",
    "                                     top_categories=['Reject, Positive'], \n",
    "                                     bottom_categories=['Reject, Negative'], \n",
    "                                     labels = {'a': 'Positive',\n",
    "                                               'b': 'Review that was Contrary to Accpetance Decision',\n",
    "                                               'not_a': 'Negative',\n",
    "                                               'not_b': 'Review that in Line With Acceptance Decision'},\n",
    "                                     term_ranker=st.OncePerDocFrequencyRanker)\n",
    "html = st.produce_four_square_axes_explorer(\n",
    "    four_square_axes=four_square_axes,\n",
    "    x_label=\"Accepts: Pos-Neg\",\n",
    "    y_label='Rejects: Neg-Pos',\n",
    "    use_full_doc=True,\n",
    "    metadata=four_square_corpus.get_df()['metadata'],\n",
    "    color_func='(function(d) {return d3.rgb(230, 220, 230)})',\n",
    ")\n",
    "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square_axes_interactive.html'\n",
    "open(file_name, 'wb').write(html.encode('utf-8'))\n",
    "#IFrame(src=file_name, width = 1500, height=700)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8206300"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "four_square= st.FourSquare(four_square_corpus, \n",
    "                             category_a_list=['Accept, Positive'], \n",
    "                             category_b_list=['Accept, Negative'], \n",
    "                             not_category_b_list=['Reject, Positive'], \n",
    "                             not_category_a_list=['Reject, Negative'], \n",
    "                             labels = {'a_and_b': 'Accept',\n",
    "                                       'not_a_and_not_b': 'Reject',\n",
    "                                       'a_and_not_b': 'Positive',\n",
    "                                       'b_and_not_a': 'Negative'},\n",
    "                             term_ranker=st.OncePerDocFrequencyRanker)\n",
    "html = st.produce_four_square_explorer(\n",
    "    four_square=four_square,\n",
    "    y_label='Accept-Reject',\n",
    "    x_label='Positive-Negative',\n",
    "    use_full_doc=True,\n",
    "    metadata = four_square_corpus.get_df()['metadata'],\n",
    ")\n",
    "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square.html'\n",
    "open(file_name, 'wb').write(html.encode('utf-8'))\n",
    "#IFrame(src=file_name, width = 1500, height=700)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "29.19629192352295\n"
     ]
    }
   ],
   "source": [
    "#corpus = corpus.remove_infrequent_words(5)\n",
    "t0 = time.time()\n",
    "compact_corpus = st.CompactTerms(corpus, st.OncePerDocFrequencyRanker, 5).compact()\n",
    "print(time.time() - t0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "fine_grain_corpus = (st.CorpusFromParsedDocuments(reviews_df, category_col='category', parsed_col='parse').build())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Reject, Negative',\n",
       " 'Reject, Neutral',\n",
       " 'Accept, Negative',\n",
       " 'Accept, Positive',\n",
       " 'Reject, Positive',\n",
       " 'Workshop, Neutral',\n",
       " 'Accept, Neutral',\n",
       " 'Workshop, Negative',\n",
       " 'Workshop, Positive']"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fine_grain_corpus.get_categories()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "fine_grain_corpus_compact = st.CompactTerms(fine_grain_corpus, st.OncePerDocFrequencyRanker, 5).compact()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(31640, 307829)"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(fine_grain_corpus_compact.get_terms()), len(fine_grain_corpus.get_terms())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['case for', 'evaluating', 'closer', 'closer to', 'machines',\n",
      "       'applications', 'e.g. the', 'node', 'doing', 'are of'],\n",
      "      dtype='object', name='term')\n",
      "Index(['between', 'way', 'only', 'first', '/', 'method', 'given', 'about',\n",
      "       'to see', 'see'],\n",
      "      dtype='object', name='term')\n"
     ]
    }
   ],
   "source": [
    "tdf = st.OncePerDocFrequencyRanker(fine_grain_corpus).get_ranks()\n",
    "ap_vs_rp = st.RankDifference().get_scores(tdf['Accept, Positive freq'], tdf['Reject, Positive freq'])\n",
    "print(terms.iloc[:10].index)\n",
    "print(terms.iloc[-10:].index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Index(['here the', 'observations', 'authors show', 'valuable', 'find that',\n",
      "       'it ’s', 'from table', 'method which', 'put', 'the process'],\n",
      "      dtype='object', name='term')\n",
      "Index(['model', 'no', 'for the', 'new', 'neural', 'are not', 'dataset',\n",
      "       'these', 'about', 'network'],\n",
      "      dtype='object', name='term')\n"
     ]
    }
   ],
   "source": [
    "an_vs_rn = st.RankDifference().get_scores(tdf['Reject, Positive freq'], tdf['Accept, Positive freq'])\n",
    "print(terms.iloc[:10].index)\n",
    "print(terms.iloc[-10:].index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [],
   "source": [
    "four_square = st.FourSquare(fine_grain_corpus_compact, \n",
    "                            ['Accept, Positive'], \n",
    "                            ['Reject, Positive'],\n",
    "                            ['Accept, Negative'], \n",
    "                            ['Reject, Negative'], \n",
    "              term_ranker=st.OncePerDocFrequencyRanker,\n",
    "              scorer = st.RankDifference())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [],
   "source": [
    "html = st.produce_four_square_explorer(four_square=four_square,\n",
    "                                       x_label='Pos-Neg',\n",
    "                                       y_label='Accept-Reject',\n",
    "                                       num_terms_semiotic_square=10,\n",
    "                                       minimum_term_frequency=10,\n",
    "                                       pmi_threshold_coefficient=10,\n",
    "                                       term_ranker=st.OncePerDocFrequencyRanker,\n",
    "                                       metadata=(fine_grain_corpus_compact._df['category'] + ': '\n",
    "                                                 + fine_grain_corpus_compact._df.rating + ', '\n",
    "                                                 + fine_grain_corpus_compact._df['title']))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "        <iframe\n",
       "            width=\"1500\"\n",
       "            height=\"700\"\n",
       "            src=\"four_square.html\"\n",
       "            frameborder=\"0\"\n",
       "            allowfullscreen\n",
       "        ></iframe>\n",
       "        "
      ],
      "text/plain": [
       "<IPython.lib.display.IFrame at 0x1bcaec630>"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_name = 'four_square.html'\n",
    "open(file_name, 'wb').write(html.encode('utf-8'))\n",
    "#IFrame(src=file_name, width = 1500, height=700)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "axes = four_square.get_axes()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "      <th>counts</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>term</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>not well</th>\n",
       "      <td>-0.060523</td>\n",
       "      <td>-0.060523</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>observations</th>\n",
       "      <td>-0.056245</td>\n",
       "      <td>-0.056245</td>\n",
       "      <td>52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>case for</th>\n",
       "      <td>-0.054141</td>\n",
       "      <td>-0.054141</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it ’s</th>\n",
       "      <td>-0.053300</td>\n",
       "      <td>-0.053300</td>\n",
       "      <td>40</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>doing</th>\n",
       "      <td>-0.046216</td>\n",
       "      <td>-0.046216</td>\n",
       "      <td>53</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>networks as</th>\n",
       "      <td>-0.043832</td>\n",
       "      <td>-0.043832</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>here the</th>\n",
       "      <td>-0.043551</td>\n",
       "      <td>-0.043551</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>be the</th>\n",
       "      <td>-0.042359</td>\n",
       "      <td>-0.042359</td>\n",
       "      <td>62</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>from table</th>\n",
       "      <td>-0.040466</td>\n",
       "      <td>-0.040466</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>natural language</th>\n",
       "      <td>-0.040466</td>\n",
       "      <td>-0.040466</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>observed</th>\n",
       "      <td>-0.039624</td>\n",
       "      <td>-0.039624</td>\n",
       "      <td>39</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>generated data</th>\n",
       "      <td>-0.037240</td>\n",
       "      <td>-0.037240</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unable to</th>\n",
       "      <td>-0.037170</td>\n",
       "      <td>-0.037170</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ensemble</th>\n",
       "      <td>-0.037170</td>\n",
       "      <td>-0.037170</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>unclear whether</th>\n",
       "      <td>-0.037170</td>\n",
       "      <td>-0.037170</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>are pretty</th>\n",
       "      <td>-0.037170</td>\n",
       "      <td>-0.037170</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>for improving</th>\n",
       "      <td>-0.037029</td>\n",
       "      <td>-0.037029</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>vanilla</th>\n",
       "      <td>-0.036959</td>\n",
       "      <td>-0.036959</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>are just</th>\n",
       "      <td>-0.036889</td>\n",
       "      <td>-0.036889</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>evaluating</th>\n",
       "      <td>-0.036889</td>\n",
       "      <td>-0.036889</td>\n",
       "      <td>24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>besides</th>\n",
       "      <td>-0.036819</td>\n",
       "      <td>-0.036819</td>\n",
       "      <td>23</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>benefit of</th>\n",
       "      <td>-0.036538</td>\n",
       "      <td>-0.036538</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>properties of</th>\n",
       "      <td>-0.036398</td>\n",
       "      <td>-0.036398</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>to add</th>\n",
       "      <td>-0.036328</td>\n",
       "      <td>-0.036328</td>\n",
       "      <td>42</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>properties</th>\n",
       "      <td>-0.035697</td>\n",
       "      <td>-0.035697</td>\n",
       "      <td>63</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>are of</th>\n",
       "      <td>-0.033943</td>\n",
       "      <td>-0.033943</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>machines</th>\n",
       "      <td>-0.033803</td>\n",
       "      <td>-0.033803</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>observations and</th>\n",
       "      <td>-0.033803</td>\n",
       "      <td>-0.033803</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>closer to</th>\n",
       "      <td>-0.033803</td>\n",
       "      <td>-0.033803</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>even the</th>\n",
       "      <td>-0.033663</td>\n",
       "      <td>-0.033663</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tasks</th>\n",
       "      <td>0.201908</td>\n",
       "      <td>0.201908</td>\n",
       "      <td>152</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>clearly</th>\n",
       "      <td>0.202819</td>\n",
       "      <td>0.202819</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>'s</th>\n",
       "      <td>0.203100</td>\n",
       "      <td>0.203100</td>\n",
       "      <td>194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>their</th>\n",
       "      <td>0.204573</td>\n",
       "      <td>0.204573</td>\n",
       "      <td>282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>interesting</th>\n",
       "      <td>0.205695</td>\n",
       "      <td>0.205695</td>\n",
       "      <td>371</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>easy to</th>\n",
       "      <td>0.211235</td>\n",
       "      <td>0.211235</td>\n",
       "      <td>118</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bit</th>\n",
       "      <td>0.211866</td>\n",
       "      <td>0.211866</td>\n",
       "      <td>143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.214181</td>\n",
       "      <td>0.214181</td>\n",
       "      <td>236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>network</th>\n",
       "      <td>0.214391</td>\n",
       "      <td>0.214391</td>\n",
       "      <td>265</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>loss</th>\n",
       "      <td>0.215092</td>\n",
       "      <td>0.215092</td>\n",
       "      <td>137</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>i am</th>\n",
       "      <td>0.219020</td>\n",
       "      <td>0.219020</td>\n",
       "      <td>150</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>am</th>\n",
       "      <td>0.219160</td>\n",
       "      <td>0.219160</td>\n",
       "      <td>155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>you</th>\n",
       "      <td>0.223648</td>\n",
       "      <td>0.223648</td>\n",
       "      <td>203</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>further</th>\n",
       "      <td>0.224770</td>\n",
       "      <td>0.224770</td>\n",
       "      <td>117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>particular</th>\n",
       "      <td>0.225682</td>\n",
       "      <td>0.225682</td>\n",
       "      <td>141</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>easy</th>\n",
       "      <td>0.231994</td>\n",
       "      <td>0.231994</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>makes</th>\n",
       "      <td>0.232274</td>\n",
       "      <td>0.232274</td>\n",
       "      <td>140</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>previous</th>\n",
       "      <td>0.232555</td>\n",
       "      <td>0.232555</td>\n",
       "      <td>153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>given</th>\n",
       "      <td>0.237674</td>\n",
       "      <td>0.237674</td>\n",
       "      <td>216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>is well</th>\n",
       "      <td>0.239287</td>\n",
       "      <td>0.239287</td>\n",
       "      <td>153</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>both</th>\n",
       "      <td>0.240830</td>\n",
       "      <td>0.240830</td>\n",
       "      <td>215</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>way</th>\n",
       "      <td>0.243986</td>\n",
       "      <td>0.243986</td>\n",
       "      <td>197</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>new</th>\n",
       "      <td>0.244828</td>\n",
       "      <td>0.244828</td>\n",
       "      <td>247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>novel</th>\n",
       "      <td>0.247002</td>\n",
       "      <td>0.247002</td>\n",
       "      <td>188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>first</th>\n",
       "      <td>0.247984</td>\n",
       "      <td>0.247984</td>\n",
       "      <td>226</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>well written</th>\n",
       "      <td>0.253945</td>\n",
       "      <td>0.253945</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>/</th>\n",
       "      <td>0.258363</td>\n",
       "      <td>0.258363</td>\n",
       "      <td>232</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>written</th>\n",
       "      <td>0.279473</td>\n",
       "      <td>0.279473</td>\n",
       "      <td>290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>about</th>\n",
       "      <td>0.319798</td>\n",
       "      <td>0.319798</td>\n",
       "      <td>249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>to see</th>\n",
       "      <td>0.328775</td>\n",
       "      <td>0.328775</td>\n",
       "      <td>185</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>31640 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                         x         y  counts\n",
       "term                                        \n",
       "not well         -0.060523 -0.060523      23\n",
       "observations     -0.056245 -0.056245      52\n",
       "case for         -0.054141 -0.054141      14\n",
       "it ’s            -0.053300 -0.053300      40\n",
       "doing            -0.046216 -0.046216      53\n",
       "networks as      -0.043832 -0.043832      15\n",
       "here the         -0.043551 -0.043551      19\n",
       "be the           -0.042359 -0.042359      62\n",
       "from table       -0.040466 -0.040466      11\n",
       "natural language -0.040466 -0.040466      13\n",
       "observed         -0.039624 -0.039624      39\n",
       "generated data   -0.037240 -0.037240       7\n",
       "unable to        -0.037170 -0.037170       8\n",
       "ensemble         -0.037170 -0.037170      12\n",
       "unclear whether  -0.037170 -0.037170       8\n",
       "are pretty       -0.037170 -0.037170       8\n",
       "for improving    -0.037029 -0.037029      14\n",
       "vanilla          -0.036959 -0.036959      19\n",
       "are just         -0.036889 -0.036889      18\n",
       "evaluating       -0.036889 -0.036889      24\n",
       "besides          -0.036819 -0.036819      23\n",
       "benefit of       -0.036538 -0.036538      31\n",
       "properties of    -0.036398 -0.036398      37\n",
       "to add           -0.036328 -0.036328      42\n",
       "properties       -0.035697 -0.035697      63\n",
       "are of           -0.033943 -0.033943       6\n",
       "machines         -0.033803 -0.033803      12\n",
       "observations and -0.033803 -0.033803       8\n",
       "closer to        -0.033803 -0.033803      12\n",
       "even the         -0.033663 -0.033663      10\n",
       "...                    ...       ...     ...\n",
       "tasks             0.201908  0.201908     152\n",
       "clearly           0.202819  0.202819     192\n",
       "'s                0.203100  0.203100     194\n",
       "their             0.204573  0.204573     282\n",
       "interesting       0.205695  0.205695     371\n",
       "easy to           0.211235  0.211235     118\n",
       "bit               0.211866  0.211866     143\n",
       "4                 0.214181  0.214181     236\n",
       "network           0.214391  0.214391     265\n",
       "loss              0.215092  0.215092     137\n",
       "i am              0.219020  0.219020     150\n",
       "am                0.219160  0.219160     155\n",
       "you               0.223648  0.223648     203\n",
       "further           0.224770  0.224770     117\n",
       "particular        0.225682  0.225682     141\n",
       "easy              0.231994  0.231994     130\n",
       "makes             0.232274  0.232274     140\n",
       "previous          0.232555  0.232555     153\n",
       "given             0.237674  0.237674     216\n",
       "is well           0.239287  0.239287     153\n",
       "both              0.240830  0.240830     215\n",
       "way               0.243986  0.243986     197\n",
       "new               0.244828  0.244828     247\n",
       "novel             0.247002  0.247002     188\n",
       "first             0.247984  0.247984     226\n",
       "well written      0.253945  0.253945     200\n",
       "/                 0.258363  0.258363     232\n",
       "written           0.279473  0.279473     290\n",
       "about             0.319798  0.319798     249\n",
       "to see            0.328775  0.328775     185\n",
       "\n",
       "[31640 rows x 3 columns]"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "axes.sort_values(by='x')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>loridp</th>\n",
       "      <th>rankdiff</th>\n",
       "      <th>sfs</th>\n",
       "      <th>sfs_p</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>_</td>\n",
       "      <td>_ _</td>\n",
       "      <td>_ _</td>\n",
       "      <td>_ _</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>$ $</td>\n",
       "      <td>_</td>\n",
       "      <td>_</td>\n",
       "      <td>_</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>dialog</td>\n",
       "      <td>novelty</td>\n",
       "      <td>time series</td>\n",
       "      <td>time series</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>medical</td>\n",
       "      <td>i do</td>\n",
       "      <td>autoencoder</td>\n",
       "      <td>autoencoder</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>word2vec</td>\n",
       "      <td>layers</td>\n",
       "      <td>series</td>\n",
       "      <td>reconstruction</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>mutual</td>\n",
       "      <td>graph</td>\n",
       "      <td>connections</td>\n",
       "      <td>series</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>mutual information</td>\n",
       "      <td>limited</td>\n",
       "      <td>reconstruction</td>\n",
       "      <td>$ $</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>_ _</td>\n",
       "      <td>claim</td>\n",
       "      <td>novelty</td>\n",
       "      <td>connections</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>mi</td>\n",
       "      <td>class</td>\n",
       "      <td>$ $</td>\n",
       "      <td>classes</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>auto encoders</td>\n",
       "      <td>is no</td>\n",
       "      <td>classes</td>\n",
       "      <td>novelty</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               loridp rankdiff             sfs           sfs_p\n",
       "0                   _      _ _             _ _             _ _\n",
       "1                 $ $        _               _               _\n",
       "2              dialog  novelty     time series     time series\n",
       "3             medical     i do     autoencoder     autoencoder\n",
       "4            word2vec   layers          series  reconstruction\n",
       "5              mutual    graph     connections          series\n",
       "6  mutual information  limited  reconstruction             $ $\n",
       "7                 _ _    claim         novelty     connections\n",
       "8                  mi    class             $ $         classes\n",
       "9       auto encoders    is no         classes         novelty"
      ]
     },
     "execution_count": 142,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tdf = corpus.get_term_freq_df()\n",
    "tdf['sfs'] = ScaledFScorePresets(beta = 1).get_scores(tdf['Reject freq'], tdf['Accept freq'])\n",
    "tdf['sfs_p'] = ScaledFScorePresets(beta = 1, priors=priors).get_scores(tdf['Reject freq'], tdf['Accept freq'])\n",
    "tdf['loridp'] = st.LogOddsRatioInformativeDirichletPrior(priors, reviews_df.parse.apply(len).mean(), 'word').get_scores(tdf['Reject freq'], tdf['Accept freq'])\n",
    "tdf['rankdiff'] = st.RankDifference().get_scores(tdf['Reject freq'], tdf['Accept freq'])\n",
    "pd.DataFrame(\n",
    "    {s:tdf.sort_values(by=s, ascending=False).iloc[::].index\n",
    "     for s in ['sfs', 'sfs_p', 'loridp', 'rankdiff']\n",
    "    }\n",
    ").iloc[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reviews_df = pd.read_csv('https://github.com/JasonKessler/ICLR18ReviewVis/raw/master/iclr2018_reviews.csv.bz2')\n",
    "reviews_df['parse'] = reviews_df['review'].apply(spacy.load('en', parser=False))\n",
    "\n",
    "# Create Corpus based on accept/reject/workshop decision\n",
    "# A two-category corpus to use for plotting, with unigrams which only occur in bigrams removed.\n",
    "# Terms used in <5 documents are removed as well.\n",
    "full_corpus = (\n",
    "    st.CorpusFromParsedDocuments(reviews_df, category_col='decision', parsed_col='parse')\n",
    "    .build().remove_categories(['Workshop'])    \n",
    "    .compact(st.CompactTerms(st.TermCompactor, minimum_term_count=6))    \n",
    ")\n",
    "\n",
    "\n",
    "# Use counts of unigrams and bigrams from the Workshop corpus as the Dirichlet prior\n",
    "priors = (st.PriorFactory(full_corpus, term_ranker=st.OncePerDocFrequencyRanker)\n",
    "          .use_categories(['Workshop'].align_to_target(corpus).get_priors()))\n",
    "term_scorer = LogOddsRatioInformativeDirichletPrior(\n",
    "          priors, reviews_df.parse.apply(len).mean(), 'word') # use the original approach to scaling prior\n",
    " \n",
    "html = st.produce_frequency_explorer(corpus, \n",
    "  category='Accept', not_categories=['Reject'],\n",
    "  term_ranker = st.OncePerDocFrequencyRanker,\n",
    "  term_scorer = term_scorer,\n",
    "  grey_threshold = 1.96,\n",
    "  metadata = corpus.get_df()['metadata'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6131293"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_name = 'accept_reject_loridp.html'\n",
    "open(file_name, 'wb').write(html.encode('utf-8'))\n",
    "#IFrame(src=file_name, width = 1500, height=700)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8424305"
      ]
     },
     "execution_count": 146,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "html = st.produce_frequency_explorer(compact_corpus, \n",
    "                                     category='Accept', \n",
    "                                     not_categories=['Reject'],\n",
    "                                     term_ranker = st.OncePerDocFrequencyRanker,\n",
    "                                     term_scorer = st.RankDifference(),\n",
    "                                     grey_threshold = 0,                                     \n",
    "                                     metadata = (corpus._df['title'] \n",
    "                                                 + '<br/>Score: ' + corpus._df['rating'].apply(lambda x: x.split(':')[0]) + '/10'\n",
    "                                                 + '<br/>Confidence: ' + corpus._df['confidence'].apply(lambda x: x.split(':')[0]) + '/5'))\n",
    "file_name = 'accept_reject_rankdiff.html'\n",
    "open(file_name, 'wb').write(html.encode('utf-8'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "four_square_corpus_phrases = (st.CorpusFromParsedDocuments(reviews_df, category_col = 'category', parsed_col = 'parse',\n",
    "                                                          feats_from_spacy_doc=st.PhraseMachinePhrases())\n",
    "                              .build().compact(st.ClassPercentageCompactor(term_count=1)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7409359"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "four_square_axes = st.FourSquareAxes(four_square_corpus_phrases, \n",
    "                                     left_categories=['Accept, Positive'], \n",
    "                                     right_categories=['Accept, Negative'], \n",
    "                                     top_categories=['Reject, Positive'], \n",
    "                                     bottom_categories=['Reject, Negative'], \n",
    "                                     labels = {'a': 'Positive',\n",
    "                                               'b': 'Review that was Contrary to Accpetance Decision',\n",
    "                                               'not_a': 'Negative',\n",
    "                                               'not_b': 'Review that in Line With Acceptance Decision'},\n",
    "                                     term_ranker=st.OncePerDocFrequencyRanker)\n",
    "html = st.produce_four_square_axes_explorer(\n",
    "    four_square_axes=four_square_axes,\n",
    "    x_label=\"Accepts: Pos-Neg\",\n",
    "    y_label='Rejects: Neg-Pos',\n",
    "    use_full_doc=True,\n",
    "    pmi_threshold_coefficient=0,\n",
    "    censor_points=False,\n",
    "    metadata=four_square_corpus_phrases.get_df()['metadata'],\n",
    "    color_func='(function(d) {return d3.rgb(230, 220, 230)})',\n",
    ")\n",
    "file_name = '../jasonkessler.github.io/iclr2018reviews/accept_reject_four_square_axes_phrases.html'\n",
    "open(file_name, 'wb').write(html.encode('utf-8'))\n",
    "#IFrame(src=file_name, width = 1500, height=700)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "$$ \\log_2 \\frac{P(\\mbox{word1\" \"word2})}{P(\\mbox{word1}) \\times P(\\mbox{word2})} > 2 * \\mbox{pmi_threshold_coefficient}$$"
   ]
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}