{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%store -r the_page\n", "%store -r the_editor\n", "%store -r editor_inputname\n", "%store -r calculator\n", "%store -r editors_conflicts\n", "\n", "if ('the_page' not in locals() or \n", " 'the_editor' not in locals() or \n", " 'editor_inputname' not in locals() or \n", " 'calculator' not in locals() or \n", " 'editors_conflicts' not in locals()):\n", " \n", " import pickle\n", " print(\"Loading default data...\")\n", " the_page = pickle.load(open(\"data/the_page.p\",'rb'))\n", " the_editor = pickle.load(open(\"data/the_editor.p\",'rb'))\n", " editor_inputname = pickle.load(open(\"data/editor_inputname.p\",'rb'))\n", " calculator = pickle.load(open(\"data/calculator.p\",'rb'))\n", " editors_conflicts = pickle.load(open(\"data/editors_conflicts.p\",'rb'))\n", "\n", "the_editor.to_frame('value')\n", "\n", "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# A. Select an editor to analyze their conflicting editors\"))\n", "display(md(f\"***Page: {the_page['title']}***\"))\n", "display(md(\"The table below presents the conflict score and other related metrics per editor \"\n", "f\"(*editor_id* and *editor* column). Select one editor of the page \\\"{the_page['title']}\\\" to analyze \" \n", "\"the overall activity of the editor in the entire Wikipedia:\"))\n", "\n", "display(md(\"\"\"\n", "- **conflict_n**: the total number of conflicts\n", "- **conflict**: the sum of conflict scores of all actions (without division)\n", "- **actions**: the total number of actions performed by the editor\n", "- **conflict_score**: the sum of conflict scores of all actions divided by the number of elegible actions\n", "- **conflict_ratio**: the count of all conflicts divided by the number of elegible actions\n", "\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from external.wikipedia import WikipediaDV, WikipediaAPI\n", "\n", "graph_description = \"\"\"\n", "In the above graph you can select the *date range* and *granularity* (yearly, montly) \n", "of the timeline (X-axis), and plot any of the following counts in the black and red lines:\n", " \n", "- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions\n", "- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)\n", "- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions\n", "- **Number of Conflicts**: the total number of conflicts\n", "- **Total Elegible Actions**: the total number of elegible actions\n", "- **Total Conflict Time**: the sum of all the times (*time_diff_secs*) that has been taken by conflict actions\n", "- **Total Elegible Time**: the sum of all the times (*time_diff_secs*) that has been taken by elegible actions\n", "- **Time per Conflict Action**: average time of conflict actions\n", "- **Time per Elegible Action**: average time of elegible actions\n", "\"\"\"\n", "\n", "def display_conflict_score(eleg_actions):\n", " global listener\n", " \n", " from visualization.calculator_listener import ConflictCalculatorListener\n", " listener = ConflictCalculatorListener(eleg_actions)\n", "\n", " metrics = ['Conflict Score', 'Absolute Conflict Score', \n", " 'Conflict Ratio', 'Number of Conflicts', \n", " 'Total Elegible Actions', \n", " 'Total Conflict Time', 'Total Elegible Time', \n", " 'Time per Conflict Action', 'Time per Elegible Action']\n", "\n", " display(md(f'*Total Page conflict score: {calculator.get_page_conflict_score()}*'))\n", "\n", " # Visualization\n", " from utils.notebooks import get_date_slider_from_datetime\n", " from ipywidgets import interact\n", " from ipywidgets.widgets import Dropdown\n", "\n", " interact(listener.listen,\n", " _range = get_date_slider_from_datetime(eleg_actions['rev_time']),\n", " granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),\n", " black=Dropdown(options=metrics, value='Conflict Score'),\n", " red=Dropdown(options= ['None'] + metrics, value='None'))\n", "\n", "def select_editor(editor):\n", " global editor_df\n", " global the_editor\n", " global editor_inputname\n", "\n", " editor_inputname=editor\n", " \n", " wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))\n", " try:\n", " the_editor = wikipedia_dv.get_editor(int(editor_inputname))\n", " except:\n", " the_editor = wikipedia_dv.get_editor(editor_inputname[2:])\n", "\n", " with out:\n", " %store the_editor\n", " %store editor_inputname\n", "\n", " clear_output()\n", " display(md(\"### Current Selection:\"))\n", " \n", " if 'invalid' in the_editor:\n", " display(f\"The editor {editor_inputname} was not found, try a different editor\")\n", " else:\n", " # display the data that will be passed to the next notebook\n", " display(the_editor.to_frame('values'))\n", " display(md(f\"#### Evolution of the Conflict Score of *{the_editor['name']}*\"))\n", "\n", " editor_df = calculator.elegible_actions[\n", " calculator.elegible_actions['editor'] == str(editor_inputname)].copy()\n", "\n", "\n", " display_conflict_score(editor_df)\n", "\n", "\n", "def on_selection_change(change):\n", "\n", " try:\n", " select_editor(qg_obj.get_selected_df().iloc[0].name)\n", " except:\n", " print('Problem parsing the name. Execute the cell again and try a different editor.')\n", "\n", "import qgrid\n", "qgrid.set_grid_option('maxVisibleRows', 5)\n", "qg_obj = qgrid.show_grid(editors_conflicts)\n", "qg_obj.observe(on_selection_change, names=['_selected_rows'])\n", " \n", "from ipywidgets import Output\n", "from IPython.display import display, clear_output, Markdown as md\n", "display(md(\"### Select one editor (row) to continue the demo:\"))\n", "display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))\n", "display(qg_obj)\n", "out = Output()\n", "display(out)\n", "display(md(graph_description))\n", "select_editor(editor_inputname)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# B. Actions per page\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "display(md(\"\"\"The following table shows the total number of actions (insertions + deletions) per month \n", "(`year_month` column), and page (`page_id` columns).\"\"\"))\n", "display(md(\"\"\"**Columns description:**\n", "- **total**: total number of actions (insertions, and deletions)\n", "- **total**: total number of actions (insertions, and deletions)\n", "- **total_surv_48h**: total number of actions that survived at least 48 hours\n", "- **total_persistent**: total number of actions that survived until, at least, the end of the month\n", "- **total_stopword_count**: total number of actions that were performed in stop words\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from wikiwho_wrapper import WikiWho\n", "import pandas as pd\n", "\n", "wikiwho = WikiWho(lng='en')\n", "agg_actions = wikiwho.dv.edit_persistence(editor_id = the_editor.userid)\n", "\n", "# convert to datetime\n", "agg_actions['year_month'] = pd.to_datetime(agg_actions['year_month'])\n", "\n", "# define total columns\n", "total_columns = ['total', 'total_surv_48h', 'total_persistent', 'total_stopword_count']\n", "\n", "# add columns with the total actions\n", "agg_actions = agg_actions.join(pd.DataFrame(\n", " agg_actions.loc[:,'adds':'adds_stopword_count'].values +\\\n", " agg_actions.loc[:,'dels':'dels_stopword_count'].values +\\\n", " agg_actions.loc[:,'reins':'reins_stopword_count'].values, \n", " index=agg_actions.index, \n", " columns=total_columns\n", "))\n", "\n", "\n", "import qgrid\n", "qgrid.set_grid_option('maxVisibleRows', 5)\n", "qgrid.show_grid(agg_actions[['year_month', 'page_id'] + total_columns])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "display(md(\"\"\"## C. Visualization of editor actions per month\"\"\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "display(md(\"\"\"In the following graph you can select the *date range* and *granularity* (yearly, montly) \n", "of the timeline (X-axis), and plot any of the follow counts in the black, red, blue and green lines:\n", " \n", "- **adds**: number of first-time insertions\n", "- **adds_surv_48h**: number of insertions for the first time that survived at least 48 hours\n", "- **adds_persistent**: number of insertions for the first time that survived until, at least, the end of the month\n", "- **adds_stopword_count**: number of insertions that were stop words\n", "- **dels**: number of deletions\n", "- **dels_surv_48h**: number of deletions that were not resinserted in the next 48 hours\n", "- **dels_persistent**: number of deletions that were not resinserted until, at least, the end of the month\n", "- **dels_stopword_count**: number of deletions that were stop words\n", "- **reins**: number of reinsertions\n", "- **reins_surv_48h**: number of reinsertionsthat survived at least 48 hours\n", "- **reins_persistent**: number of reinsertionsthat survived until the end of the month\n", "- **reins_stopword_count**: number of reinsertionsthat were stop words\n", "\"\"\"))\n", "\n", "# Listener\n", "from visualization.actions_listener import ActionsListener\n", "listener = ActionsListener(agg_actions)\n", "\n", "actions = (agg_actions.loc[:,'total':'total_stopword_count'].columns.append(\n", " agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()\n", "\n", "\n", "# Visualization\n", "from utils.notebooks import get_date_slider_from_datetime\n", "from ipywidgets import interact, fixed\n", "from ipywidgets.widgets import Dropdown\n", "\n", "interact(listener.listen, \n", " _range = get_date_slider_from_datetime(agg_actions['year_month']),\n", " editor=fixed('All'),\n", " granularity=Dropdown(options=['Yearly', 'Monthly'], value='Monthly'),\n", " black=Dropdown(options=actions, value='total'), \n", " red=Dropdown(options= ['None'] + actions, value='total_surv_48h'),\n", " green=Dropdown(options= ['None'] + actions, value='None'), \n", " blue=Dropdown(options= ['None'] + actions, value='None'))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f'## D. Editor Conflict'))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "display(md(\"\"\"The term **conflict** is explained in Notebook 2. \n", "The total conflict of an editor is the sum of all the conflict scores of all actions with \n", "conflict (or conflict actions). This can be normalized if the sum is divided by the number of \n", "actions that can potentially enter into conflict (elegible actions, i.e actions that have occurred at \n", "least twice).\n", "\n", "In the following graph you can select the *date range* and *granularity* (yearly, montly) \n", "of the timeline (X-axis), and plot any of the following counts in the black and red lines:\n", " \n", "- **Total**: total number of actions (insertions, and deletions)\n", "- **Total_surv_48h**: total number of actions that survived at least 48 hours\n", "- **Total_persistent**: total number of actions that survived until, at least, the end of the month\n", "- **Total_stopword_count**: total number of actions that were performed in stop words\n", "- **Total Elegible Actions**: the total number of elegible actions\n", "- **Number of Conflicts**: the total number of conflicts\n", "- **Number of Revisions**: the total number of conflicts\n", "- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions\n", "- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)\n", "- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions\n", "\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Visualization\n", "from visualization.conflicts_listener import ConflictsListener\n", "listener = ConflictsListener(agg_actions)\n", "\n", "metrics = ['Total', 'Total_surv_48h', 'Total_persistent', 'Total_stopword_count',\n", " 'Total Elegible Actions', 'Number of Conflicts', 'Number of Revisions',\n", " 'Conflict Score', 'Absolute Conflict Score', 'Conflict Ratio']\n", "conflict_score = agg_actions.conflict.sum() / agg_actions.elegibles.sum()\n", "display(md(f'**Editor conflict score: {conflict_score}**'))\n", "\n", "# Visualization\n", "from utils.notebooks import get_date_slider_from_datetime\n", "from ipywidgets import interact\n", "from ipywidgets.widgets import Dropdown\n", "\n", "if (conflict_score != 0):\n", " interact(listener.listen,\n", " _range = get_date_slider_from_datetime(agg_actions['year_month']),\n", " granularity=Dropdown(options=['Yearly', 'Monthly'], value='Monthly'),\n", " black=Dropdown(options=metrics, value='Conflict Score'),\n", " red=Dropdown(options= ['None'] + metrics, value='None'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import HTML\n", "from utils.notebooks import get_next_notebook\n", "\n", "display(HTML(f'Go to next workbook'))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 2 }