{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "%store -r the_page\n", "\n", "if 'the_page' not in locals():\n", " import pickle\n", " print(\"Loading default data...\")\n", " the_page = pickle.load(open(\"data/the_page.p\",'rb'))\n", "\n", "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# A. Insertions, Deletions, Reinsertions (Actions)\"))\n", "display(md(f\" Provided by the [WikiWho API](https://www.wikiwho.net/en/api/v1.0.0-beta/)\"))\n", "display(md(f\"Please give the background processes time to load (see cog wheel symbol right of 'edit app') before interacting with the controls too often!\"))\n", "display(md(f\"***Page: {the_page['title']}***\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#missing description of the action types, directly after the header, see the tocktrack paper for explanations\n", "# please add editor names instead of \"editor id\". for IPs just put \"Unregistered editors\"\n", "# Fails for larger articles like 'Evolution' on GESIS notebooks, Kernel dies when ranges are selected. Solution?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from wikiwho_wrapper import WikiWho\n", "import pandas as pd\n", "import qgrid\n", "# set the default max number of rows to 10 so the larger DataFrame we render don't take up to much space \n", "qgrid.set_grid_option('maxVisibleRows', 5)\n", "\n", "wikiwho = WikiWho(lng='en')\n", "agg_actions = wikiwho.dv.actions(the_page.page_id)\n", "\n", "# define total columns\n", "total_columns = ['total', 'total_surv_48h', 'total_persistent', 'total_stopword_count']\n", "\n", "# add columns with the total actions\n", "agg_actions = agg_actions.join(pd.DataFrame(\n", " agg_actions.loc[:,'adds':'adds_stopword_count'].values +\\\n", " agg_actions.loc[:,'dels':'dels_stopword_count'].values +\\\n", " agg_actions.loc[:,'reins':'reins_stopword_count'].values, \n", " index=agg_actions.index, \n", " columns=total_columns\n", "))\n", "\n", "qgrid.show_grid(agg_actions[['year_month', 'editor_id'] + total_columns])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Convert to datetime\n", "agg_actions['year_month'] = pd.to_datetime(agg_actions['year_month'])\n", "\n", "# Group the data by year month and page (drop the editor information)\n", "agg_actions.drop('editor_id', axis=1).groupby(['year_month','page_id']).sum()\n", "\n", "# Listener\n", "from visualization.actions_listener import ActionsListener\n", "listener = ActionsListener(agg_actions)\n", "action_types = (agg_actions.loc[:,'total':'total_stopword_count'].columns.append(\n", " agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()\n", "\n", "# Visualization\n", "from utils.notebooks import get_date_slider_from_datetime\n", "from ipywidgets import interact, fixed\n", "from ipywidgets.widgets import Dropdown\n", "\n", "interact(listener.listen,\n", " _range = get_date_slider_from_datetime(agg_actions['year_month']),\n", " editor=fixed('All'),\n", " granularity=Dropdown(options=['Yearly', 'Monthly'], value='Yearly'),\n", " black=Dropdown(options=action_types, value='total'), \n", " red=Dropdown(options= ['None'] + action_types, value='total_surv_48h'),\n", " green=Dropdown(options= ['None'] + action_types, value='None'), \n", " blue=Dropdown(options= ['None'] + action_types, value='None'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# B. Measuring conflict\"))\n", "display(md(f'## B.1 Token Conflict Score'))\n", "display(md(f\"***Page: {the_page['title']}***\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# create the api\n", "from wikiwho_wrapper import WikiWho\n", "wikiwho = WikiWho(lng='en')\n", "\n", "from IPython.display import display, Markdown as md\n", "# Get the content and revisions from the wikiwho api\n", "display(md(\"Downloading all_content from the WikiWhoApi...\"))\n", "all_content = wikiwho.dv.all_content(the_page['page_id'])\n", "\n", "display(md(\"Downloading revisions from the WikiWhoApi...\"))\n", "revisions = wikiwho.dv.rev_ids_of_article(the_page['page_id'])\n", "\n", "from IPython.display import clear_output\n", "clear_output()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from metrics.conflict import ConflictManager\n", "from wikiwho_wrapper import WikiWho\n", "from IPython.display import clear_output\n", "from IPython.display import HTML\n", "from utils.notebooks import get_next_notebook, get_previous_notebook\n", "\n", "# call the calculator\n", "calculator = ConflictManager(all_content, revisions)\n", "calculator.calculate()\n", "clear_output()\n", "\n", "# display the tokens, the difference in seconds and its corresponding conflict score\n", "conflicts = calculator.conflicts.copy()\n", "conflicts['time_diff_secs'] = conflicts['time_diff'].dt.total_seconds()\n", " \n", "if len(conflicts) > 0:\n", " display(qgrid.show_grid(conflicts[[\n", " 'action', 'token', 'token_id', 'rev_id', \n", " 'editor', 'time_diff_secs', 'conflict']].sort_values('conflict', ascending=False)))\n", "else:\n", " display(md(f'**There are no conflicting tokes in this page.**'))\n", " display(HTML(f'Go back to the previous workbook'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "inputHidden": false, "outputHidden": false }, "outputs": [], "source": [ "# explain the columns of the table" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f'## B.2 Conflicting tokens per page'))\n", "display(md(f\"***Page: {the_page['title']}***\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "inputHidden": false, "outputHidden": false }, "outputs": [], "source": [ "#explain: Eligible Actions, all actions, only conflicts, before the selection boxes and how the score is calculated (at least the basics)\n", "# explain what the colors in the tag cloud mean (why are insertions red and deletions blue, not the other way around?)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# listener\n", "from visualization.wordcloud_listener import WCListener\n", "\n", "listener = WCListener(sources = {\n", " 'All actions': calculator.all_actions,\n", " 'Eligible Actions': calculator.elegible_actions,\n", " 'Only Conflicts': calculator.conflicts\n", "})\n", "\n", "# visualization\n", "from utils.notebooks import get_date_slider_from_datetime\n", "from ipywidgets import interact, fixed\n", "from ipywidgets.widgets import Dropdown\n", "\n", "interact(listener.listen, \n", " _range=get_date_slider_from_datetime(calculator.all_actions['rev_time']),\n", " source=Dropdown(options=list(listener.sources.keys()), value='Only Conflicts'),\n", " action=Dropdown(options=['Both', 'Just Insertions', 'Just Deletions'], value='Both'),\n", " editor=fixed('All'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f'## B.3 Conflict Score and related metrics'))\n", "display(md(f\"***Page: {the_page['title']}***\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Visualization\n", "from visualization.conflicts_listener import ConflictsListener\n", "elegible_actions = calculator.elegible_actions.copy()\n", "listener = ConflictsListener(elegible_actions)\n", "\n", "metrics = ['Conflict Score', 'Conflict Ratio', 'Total Conflicts', \n", " 'Total Elegible Actions', 'Total Actions', 'Total Time',\n", " 'Time per Elegible Action']\n", "\n", "display(md(f'**Page conflict score: {calculator.get_page_conflict_score()}**'))\n", "\n", "# Visualization\n", "from utils.notebooks import get_date_slider_from_datetime\n", "from ipywidgets import interact\n", "from ipywidgets.widgets import Dropdown\n", "\n", "if (calculator.get_page_conflict_score() != 0):\n", " interact(listener.listen,\n", " _range = get_date_slider_from_datetime(elegible_actions['rev_time']),\n", " granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Monthly'),\n", " black=Dropdown(options=metrics, value='Conflict Score'),\n", " red=Dropdown(options= ['None'] + metrics, value='None'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "inputHidden": false, "outputHidden": false }, "outputs": [], "source": [ "#explain difference betweet conflict ratioo, scores, total conflicts...time per el..." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f'## B.4 Conflict Score per Editor'))\n", "display(md(f\"***Page: {the_page['title']}***\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "editors_conflicts = calculator.get_conflict_score_per_editor()\n", "qg_obj = qgrid.show_grid(editors_conflicts)\n", "if len(editors_conflicts) > 0:\n", " display(qg_obj)\n", "else:\n", " display(md(f'**There is no Conflict Score**')) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import HTML\n", "from utils.notebooks import get_next_notebook, get_previous_notebook\n", "\n", "%store agg_actions\n", "%store calculator\n", "clear_output()\n", " \n", "\n", "if len(editors_conflicts) > 0:\n", " display(HTML(f'Go to next workbook'))\n", "else:\n", " display(HTML(f'Go back to the previous workbook'))\n" ] } ], "metadata": { "kernel_info": { "name": "python3" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "nteract": { "version": "0.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }