{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Event Data: Infobox and Section"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# basic defaults, including study dates, common SQL exclusions and parquet files for anonymized data\n",
    "%run -i 'data-defaults.py'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## InfoBox and Main section events for WP:M pages with external links"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>in_infobox</th>\n",
       "      <th>action</th>\n",
       "      <th>total_event_count</th>\n",
       "      <th>daily_average_event_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>extClick</td>\n",
       "      <td>684612</td>\n",
       "      <td>21394.12500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>fnClick</td>\n",
       "      <td>817320</td>\n",
       "      <td>25541.25000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>fnHover</td>\n",
       "      <td>1533302</td>\n",
       "      <td>47915.68750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>upClick</td>\n",
       "      <td>49263</td>\n",
       "      <td>1539.46875</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>True</td>\n",
       "      <td>extClick</td>\n",
       "      <td>141369</td>\n",
       "      <td>4417.78125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>True</td>\n",
       "      <td>fnClick</td>\n",
       "      <td>70342</td>\n",
       "      <td>2198.18750</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>True</td>\n",
       "      <td>fnHover</td>\n",
       "      <td>26663</td>\n",
       "      <td>833.21875</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  in_infobox    action  total_event_count  daily_average_event_count\n",
       "0      False  extClick             684612                21394.12500\n",
       "1      False   fnClick             817320                25541.25000\n",
       "2      False   fnHover            1533302                47915.68750\n",
       "3      False   upClick              49263                 1539.46875\n",
       "4       True  extClick             141369                 4417.78125\n",
       "5       True   fnClick              70342                 2198.18750\n",
       "6       True   fnHover              26663                  833.21875"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Total count of events (by all event types) in InfoBoxes for WP:M pages only\n",
    "# Total count of events (by all event types) in the Main section for WP:M pages only\n",
    "pm_infobox_events_query = \"\"\"\n",
    "SELECT in_infobox, action, count(*) AS total_event_count, count(*)/{} AS daily_average_event_count\n",
    "FROM \n",
    "    citationusage \n",
    "WHERE wiki = 'enwiki'\n",
    "    AND page_id IN (SELECT page_id FROM ryanmax.population_wpm_pages_with_extlinks)\n",
    "    {}\n",
    "    AND to_date(event_time) >= '{}'\n",
    "    AND to_date(event_time) <= '{}'\n",
    "    AND useragent_is_bot = FALSE\n",
    "GROUP BY in_infobox, action\n",
    "ORDER BY in_infobox, action\n",
    "\"\"\"\n",
    "\n",
    "pm_infobox_events = spark.sql(\n",
    "    pm_infobox_events_query.format(\n",
    "        days_in_study, event_exclusion_sql, start_date_string, end_date_string\n",
    "    ))\n",
    "pm_infobox_events.toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### InfoBox and Main section events for W pages with external links"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>in_infobox</th>\n",
       "      <th>action</th>\n",
       "      <th>total_event_count</th>\n",
       "      <th>daily_average_event_count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>False</td>\n",
       "      <td>extClick</td>\n",
       "      <td>35176829</td>\n",
       "      <td>1.099276e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>False</td>\n",
       "      <td>fnClick</td>\n",
       "      <td>21324424</td>\n",
       "      <td>6.663882e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>False</td>\n",
       "      <td>fnHover</td>\n",
       "      <td>34837380</td>\n",
       "      <td>1.088668e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>False</td>\n",
       "      <td>upClick</td>\n",
       "      <td>1111282</td>\n",
       "      <td>3.472756e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>True</td>\n",
       "      <td>extClick</td>\n",
       "      <td>14651060</td>\n",
       "      <td>4.578456e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>True</td>\n",
       "      <td>fnClick</td>\n",
       "      <td>1783788</td>\n",
       "      <td>5.574338e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>True</td>\n",
       "      <td>fnHover</td>\n",
       "      <td>1089149</td>\n",
       "      <td>3.403591e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>True</td>\n",
       "      <td>upClick</td>\n",
       "      <td>340</td>\n",
       "      <td>1.062500e+01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  in_infobox    action  total_event_count  daily_average_event_count\n",
       "0      False  extClick           35176829               1.099276e+06\n",
       "1      False   fnClick           21324424               6.663882e+05\n",
       "2      False   fnHover           34837380               1.088668e+06\n",
       "3      False   upClick            1111282               3.472756e+04\n",
       "4       True  extClick           14651060               4.578456e+05\n",
       "5       True   fnClick            1783788               5.574338e+04\n",
       "6       True   fnHover            1089149               3.403591e+04\n",
       "7       True   upClick                340               1.062500e+01"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Total count of events (by all event types) in InfoBoxes for W pages\n",
    "# Total count of events (by all event types) in the Main section for W pages\n",
    "w_infobox_events_query = \"\"\"\n",
    "SELECT in_infobox, action, count(*) AS total_event_count, count(*)/{} AS daily_average_event_count\n",
    "FROM \n",
    "    citationusage \n",
    "WHERE wiki = 'enwiki'\n",
    "    AND page_id IN (SELECT page_id FROM ryanmax.population_w_pages_with_extlinks)\n",
    "    {}\n",
    "    AND to_date(event_time) >= '{}'\n",
    "    AND to_date(event_time) <= '{}'\n",
    "    AND useragent_is_bot = FALSE\n",
    "GROUP BY in_infobox, action\n",
    "ORDER BY in_infobox, action\n",
    "\"\"\"\n",
    "\n",
    "w_infobox_events = spark.sql(\n",
    "    w_infobox_events_query.format(\n",
    "        days_in_study, event_exclusion_sql, start_date_string, end_date_string\n",
    "    ))\n",
    "w_infobox_events.toPandas()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Infobox clicks that occurred under a section heading (e.g. External links)\n",
    "- no page limits\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+--------+-----------------+-------------------------+\n",
      "|          section_id|  action|total_event_count|daily_average_event_count|\n",
      "+--------------------+--------+-----------------+-------------------------+\n",
      "|                null|extClick|         14979994|              468124.8125|\n",
      "|                null| fnClick|          1849201|              57787.53125|\n",
      "|                null| fnHover|          1107570|               34611.5625|\n",
      "|           Reception| fnClick|            28389|                887.15625|\n",
      "|           Reception| fnHover|            13881|                433.78125|\n",
      "|      External_links|extClick|             7268|                  227.125|\n",
      "|            Timeline|extClick|             4907|                153.34375|\n",
      "|            Rankings| fnClick|             4270|                 133.4375|\n",
      "|            Rankings| fnHover|             3925|                122.65625|\n",
      "|  In_popular_culture|extClick|             3107|                 97.09375|\n",
      "|       Bonnie_Parker|extClick|             2648|                    82.75|\n",
      "|          Soundtrack|extClick|             2208|                     69.0|\n",
      "|      Elevator_video|extClick|             2046|                  63.9375|\n",
      "|              Career|extClick|             2020|                   63.125|\n",
      "|             History|extClick|             1918|                  59.9375|\n",
      "|       Assassination|extClick|             1774|                  55.4375|\n",
      "| Professional_career|extClick|             1671|                 52.21875|\n",
      "|Gesundheit!_Insti...|extClick|             1492|                   46.625|\n",
      "| Body_camera_footage|extClick|             1425|                 44.53125|\n",
      "|              Events|extClick|             1392|                     43.5|\n",
      "+--------------------+--------+-----------------+-------------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Infobox clicks that occurred under a section heading (e.g. External links)\n",
    "# no limits\n",
    "infobox_section_events_query = \"\"\"\n",
    "SELECT section_id, action, count(*) AS total_event_count, count(*)/{} AS daily_average_event_count\n",
    "FROM\n",
    "    citationusage\n",
    "WHERE\n",
    "    wiki = 'enwiki'\n",
    "    AND in_infobox = TRUE\n",
    "    {}\n",
    "    AND useragent_is_bot = FALSE\n",
    "GROUP BY section_id, action\n",
    "ORDER BY total_event_count desc\n",
    "\"\"\"\n",
    "\n",
    "infobox_section_events = spark.sql(infobox_section_events_query.format(days_in_study,event_exclusion_sql))\n",
    "infobox_section_events.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Total count of events (by all event types) for each top-level (H2) section ID for WP:M pages only\n",
    " - where the event also occurred in an InfoBox"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Total count of events (by all event types) for each top-level (H2) section ID for WP:M pages only\n",
    "# where the event also occurred in an InfoBox\n",
    "pm_section_events_query = \"\"\"\n",
    "SELECT population_wpm_sections.section_h2, action, count(*) count\n",
    "FROM \n",
    "    citationusage\n",
    "    LEFT JOIN ryanmax.population_wpm_sections \n",
    "        ON \n",
    "        population_wpm_sections.page_id = citationusage.page_id \n",
    "        AND population_wpm_sections.section_id = citationusage.section_id\n",
    "WHERE\n",
    "    wiki = 'enwiki'\n",
    "    AND in_infobox = TRUE\n",
    "    AND citationusage.page_id IN \n",
    "        (SELECT DISTINCT page_id \n",
    "        FROM ryanmax.population_wpm_pages_with_extlinks\n",
    "        )\n",
    "    {}\n",
    "    AND to_date(citationusage.event_time) >= '{}'\n",
    "    AND to_date(citationusage.event_time) <= '{}'\n",
    "    AND useragent_is_bot = FALSE\n",
    "GROUP BY population_wpm_sections.section_h2, action\n",
    "ORDER BY count desc\n",
    "\"\"\"\n",
    "\n",
    "pm_section_events = spark.sql(\n",
    "    pm_section_events_query.format(\n",
    "        event_exclusion_sql, start_date_string, end_date_string\n",
    "    ))\n",
    "pm_section_events_rdd = pm_section_events.rdd\n",
    "pm_section_events_df = sqlContext.createDataFrame(pm_section_events_rdd)\n",
    "pm_section_events_pandas = pm_section_events_df.toPandas()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Count of Infobox events (by all event types) occurring under each top-level (H2) section ID\n",
    "** Limits: WP:M pages and >= 5 events **"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>action</th>\n",
       "      <th>extClick</th>\n",
       "      <th>fnClick</th>\n",
       "      <th>fnHover</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>section_h2</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>-- Infobox event outside of a section --</th>\n",
       "      <td>135256</td>\n",
       "      <td>70104</td>\n",
       "      <td>26495</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ASH_(United_Kingdom)</th>\n",
       "      <td>5</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Academics</th>\n",
       "      <td>None</td>\n",
       "      <td>20</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Battle_with_schools</th>\n",
       "      <td>None</td>\n",
       "      <td>146</td>\n",
       "      <td>48</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bibliography</th>\n",
       "      <td>9</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Books</th>\n",
       "      <td>50</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Career</th>\n",
       "      <td>10</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Cause</th>\n",
       "      <td>12</td>\n",
       "      <td>5</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Causes</th>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>21</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Clinfowiki</th>\n",
       "      <td>16</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Common_families_of_interleukins</th>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>DAN_America</th>\n",
       "      <td>8</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Diagnosis</th>\n",
       "      <td>15</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>External_links</th>\n",
       "      <td>4526</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EyeWiki</th>\n",
       "      <td>13</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Family_and_birth</th>\n",
       "      <td>74</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Ganfyd</th>\n",
       "      <td>12</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Health</th>\n",
       "      <td>7</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HemOnc.org</th>\n",
       "      <td>5</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>History</th>\n",
       "      <td>67</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>History,_society,_and_culture</th>\n",
       "      <td>55</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Institute_for_Functional_Medicine</th>\n",
       "      <td>124</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Legacy</th>\n",
       "      <td>19</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Levels</th>\n",
       "      <td>34</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Me2/Orchestra</th>\n",
       "      <td>8</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mechanism_of_action</th>\n",
       "      <td>21</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Medical_use</th>\n",
       "      <td>7</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mental_Health_Parity_and_Addiction_Equity_Act</th>\n",
       "      <td>5</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mental_changes_and_brain_damage</th>\n",
       "      <td>75</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Neurosurgery_methods</th>\n",
       "      <td>6</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Nurofen</th>\n",
       "      <td>9</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Occurrence</th>\n",
       "      <td>213</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Origin_of_term</th>\n",
       "      <td>65</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Overdose</th>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Overview</th>\n",
       "      <td>51</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Pathophysiology</th>\n",
       "      <td>22</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Political_career</th>\n",
       "      <td>7</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rankings_and_reputation</th>\n",
       "      <td>None</td>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>References</th>\n",
       "      <td>13</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rehabilitation_attempts</th>\n",
       "      <td>6</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Rescue</th>\n",
       "      <td>56</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>School_of_Medicine</th>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>See_also</th>\n",
       "      <td>5</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Selected_publications</th>\n",
       "      <td>76</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Signs_and_symptoms</th>\n",
       "      <td>162</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Society_and_culture</th>\n",
       "      <td>9</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Structure</th>\n",
       "      <td>10</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Toxicity_and_precautions</th>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Tuberculosis_in_early_civilization</th>\n",
       "      <td>7</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Uses</th>\n",
       "      <td>11</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Versions</th>\n",
       "      <td>15</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Virology</th>\n",
       "      <td>None</td>\n",
       "      <td>7</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WikEM</th>\n",
       "      <td>5</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WikiDoc</th>\n",
       "      <td>24</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WikiLectures</th>\n",
       "      <td>7</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ZDoggMD</th>\n",
       "      <td>53</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "action                                        extClick fnClick fnHover\n",
       "section_h2                                                            \n",
       "-- Infobox event outside of a section --        135256   70104   26495\n",
       "ASH_(United_Kingdom)                                 5    None    None\n",
       "Academics                                         None      20      18\n",
       "Battle_with_schools                               None     146      48\n",
       "Bibliography                                         9    None    None\n",
       "Books                                               50    None    None\n",
       "Career                                              10    None    None\n",
       "Cause                                               12       5    None\n",
       "Causes                                            None    None      21\n",
       "Clinfowiki                                          16    None    None\n",
       "Common_families_of_interleukins                     23    None    None\n",
       "DAN_America                                          8    None    None\n",
       "Diagnosis                                           15    None    None\n",
       "External_links                                    4526    None    None\n",
       "EyeWiki                                             13    None    None\n",
       "Family_and_birth                                    74    None    None\n",
       "Ganfyd                                              12    None    None\n",
       "Health                                               7    None    None\n",
       "HemOnc.org                                           5    None    None\n",
       "History                                             67    None    None\n",
       "History,_society,_and_culture                       55    None    None\n",
       "Institute_for_Functional_Medicine                  124    None    None\n",
       "Legacy                                              19    None    None\n",
       "Levels                                              34    None    None\n",
       "Me2/Orchestra                                        8    None    None\n",
       "Mechanism_of_action                                 21       5       8\n",
       "Medical_use                                          7    None    None\n",
       "Mental_Health_Parity_and_Addiction_Equity_Act        5    None    None\n",
       "Mental_changes_and_brain_damage                     75    None    None\n",
       "Neurosurgery_methods                                 6    None    None\n",
       "Nurofen                                              9    None    None\n",
       "Occurrence                                         213    None    None\n",
       "Origin_of_term                                      65    None    None\n",
       "Overdose                                          None    None       9\n",
       "Overview                                            51    None    None\n",
       "Pathophysiology                                     22    None    None\n",
       "Political_career                                     7    None    None\n",
       "Rankings_and_reputation                           None       8       6\n",
       "References                                          13    None    None\n",
       "Rehabilitation_attempts                              6    None    None\n",
       "Rescue                                              56    None    None\n",
       "School_of_Medicine                                None    None       9\n",
       "See_also                                             5    None    None\n",
       "Selected_publications                               76    None    None\n",
       "Signs_and_symptoms                                 162    None    None\n",
       "Society_and_culture                                  9    None    None\n",
       "Structure                                           10    None    None\n",
       "Toxicity_and_precautions                          None    None       7\n",
       "Tuberculosis_in_early_civilization                   7    None    None\n",
       "Uses                                                11    None    None\n",
       "Versions                                            15    None    None\n",
       "Virology                                          None       7      12\n",
       "WikEM                                                5    None    None\n",
       "WikiDoc                                             24    None    None\n",
       "WikiLectures                                         7    None    None\n",
       "ZDoggMD                                             53    None    None"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "section_pda = pm_section_events_pandas.copy()\n",
    "# replace 'NaN' section_h2 with 'missing'\n",
    "section_pda.section_h2.fillna(value='-- Infobox event outside of a section --', inplace=True)\n",
    "# limit to counts of 1K or more\n",
    "section_pda['count'] = section_pda['count'].astype(int)\n",
    "df_filtered = section_pda.query('count>=5').copy()\n",
    "# set precision before pivot\n",
    "df_filtered['count'] = df_filtered['count'].map(lambda x: '{0:.0f}'.format(x))\n",
    "df_filtered.pivot(index='section_h2', columns='action', values='count')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "PySpark large: spark-xml jar and local venv path ",
   "language": "python",
   "name": "spark-ryanmax"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}