{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%store -r the_page\n", "%store -r the_editor\n", "%store -r editor_inputname\n", "%store -r calculator\n", "%store -r editors_conflicts\n", "\n", "if ('the_page' not in locals() or \n", " 'the_editor' not in locals() or \n", " 'editor_inputname' not in locals() or \n", " 'calculator' not in locals() or \n", " 'editors_conflicts' not in locals()):\n", " \n", " import pickle\n", " print(\"Loading default data...\")\n", " the_page = pickle.load(open(\"data/the_page.p\",'rb'))\n", " the_editor = pickle.load(open(\"data/the_editor.p\",'rb'))\n", " editor_inputname = pickle.load(open(\"data/editor_inputname.p\",'rb'))\n", " calculator = pickle.load(open(\"data/calculator.p\",'rb'))\n", " editors_conflicts = pickle.load(open(\"data/editors_conflicts.p\",'rb'))\n", "\n", "the_editor.to_frame('value')\n", "\n", "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# A. Select an editor to analyze their conflicting editors\"))\n", "display(md(f\"***Page: {the_page['title']}***\"))\n", "display(md(\"The table below presents the conflict score and other related metrics per editor \"\n", "f\"(*editor_id* and *editor* column). Select one editor of the page \\\"{the_page['title']}\\\" to analyze \" \n", "\"the overall activity of the editor in the entire Wikipedia:\"))\n", "\n", "display(md(\"\"\"\n", "- **conflict_n**: the total number of conflicts\n", "- **conflict**: the sum of conflict scores of all actions (without division)\n", "- **actions**: the total number of actions performed by the editor\n", "- **conflict_score**: the sum of conflict scores of all actions divided by the number of elegible actions\n", "- **conflict_ratio**: the count of all conflicts divided by the number of elegible actions\n", "\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "from visualization.conflicts_listener import ConflictsListener\n", "from external.wikipedia import WikipediaDV, WikipediaAPI\n", "\n", "graph_description = \"\"\"\n", "In the above graph you can select the *date range* and *granularity* (yearly, montly) \n", "of the timeline (X-axis), and plot any of the following counts in the black and red lines:\n", " \n", "- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions\n", "- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)\n", "- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions\n", "- **Number of Conflicts**: the total number of conflicts\n", "- **Total Elegible Actions**: the total number of elegible actions\n", "- **Total Conflict Time**: the sum of all the times (*time_diff_secs*) that has been taken by conflict actions\n", "- **Total Elegible Time**: the sum of all the times (*time_diff_secs*) that has been taken by elegible actions\n", "- **Time per Conflict Action**: average time of conflict actions\n", "- **Time per Elegible Action**: average time of elegible actions\n", "\"\"\"\n", "\n", "def display_conflict_score(eleg_actions):\n", " global listener\n", " \n", " from visualization.conflicts_listener import ConflictsListener\n", " listener = ConflictsListener(eleg_actions)\n", "\n", " metrics = ['Conflict Score', 'Absolute Conflict Score', \n", " 'Conflict Ratio', 'Number of Conflicts', \n", " 'Total Elegible Actions', \n", " 'Total Conflict Time', 'Total Elegible Time', \n", " 'Time per Conflict Action', 'Time per Elegible Action']\n", "\n", " display(md(f'*Total Page conflict score: {calculator.get_page_conflict_score()}*'))\n", "\n", " # Visualization\n", " from utils.notebooks import get_date_slider_from_datetime\n", " from ipywidgets import interact\n", " from ipywidgets.widgets import Dropdown\n", "\n", " interact(listener.listen,\n", " _range = get_date_slider_from_datetime(eleg_actions['rev_time']),\n", " granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),\n", " black=Dropdown(options=metrics, value='Conflict Score'),\n", " red=Dropdown(options= ['None'] + metrics, value='None'))\n", "\n", "def select_editor(editor):\n", " global editor_df\n", " global the_editor\n", " global editor_inputname\n", "\n", " editor_inputname=editor\n", " \n", " wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))\n", " try:\n", " the_editor = wikipedia_dv.get_editor(int(editor_inputname))\n", " except:\n", " the_editor = wikipedia_dv.get_editor(editor_inputname[2:])\n", "\n", " with out:\n", " %store the_editor\n", " %store editor_inputname\n", "\n", " clear_output()\n", " display(md(\"### Current Selection:\"))\n", " \n", " if 'invalid' in the_editor:\n", " display(f\"The editor {editor_inputname} was not found, try a different editor\")\n", " else:\n", " # display the data that will be passed to the next notebook\n", " display(the_editor.to_frame('values'))\n", " display(md(f\"#### Evolution of the Conflict Score of *{the_editor['name']}*\"))\n", "\n", " editor_df = calculator.elegible_actions[\n", " calculator.elegible_actions['editor'] == editor_inputname].copy()\n", "\n", "\n", " display_conflict_score(editor_df)\n", "\n", "\n", "def on_selection_change(change):\n", "\n", " try:\n", " select_editor(qg_obj.get_selected_df().iloc[0].name)\n", " except:\n", " print('Problem parsing the name. Execute the cell again and try a different editor.')\n", "\n", "import qgrid\n", "qgrid.set_grid_option('maxVisibleRows', 5)\n", "qg_obj = qgrid.show_grid(editors_conflicts)\n", "qg_obj.observe(on_selection_change, names=['_selected_rows'])\n", " \n", "from ipywidgets import Output\n", "from IPython.display import display, clear_output, Markdown as md\n", "display(md(\"### Select one editor (row) to continue the demo:\"))\n", "display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))\n", "display(qg_obj)\n", "out = Output()\n", "display(out)\n", "display(md(graph_description))\n", "select_editor(editor_inputname)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# B. Actions per page\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "display(md(\"\"\"The following table shows the total number of actions (insertions + deletions) per month \n", "(`year_month` column), and page (`page_id` columns).\"\"\"))\n", "display(md(\"\"\"**Columns description:**\n", "- **total**: total number of actions (insertions, and deletions)\n", "- **total**: total number of actions (insertions, and deletions)\n", "- **total_surv_48h**: total number of actions that survived at least 48 hours\n", "- **total_persistent**: total number of actions that survived until, at least, the end of the month\n", "- **total_stopword_count**: total number of actions that were performed in stop words\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from wikiwho_wrapper import WikiWho\n", "import pandas as pd\n", "\n", "wikiwho = WikiWho(lng='en')\n", "agg_actions = wikiwho.dv.edit_persistence(editor_id = the_editor.userid)\n", "\n", "# convert to datetime\n", "agg_actions['year_month'] = pd.to_datetime(agg_actions['year_month'])\n", "\n", "# define total columns\n", "total_columns = ['total', 'total_surv_48h', 'total_persistent', 'total_stopword_count']\n", "\n", "# add columns with the total actions\n", "agg_actions = agg_actions.join(pd.DataFrame(\n", " agg_actions.loc[:,'adds':'adds_stopword_count'].values +\\\n", " agg_actions.loc[:,'dels':'dels_stopword_count'].values +\\\n", " agg_actions.loc[:,'reins':'reins_stopword_count'].values, \n", " index=agg_actions.index, \n", " columns=total_columns\n", "))\n", "\n", "\n", "import qgrid\n", "qgrid.set_grid_option('maxVisibleRows', 5)\n", "qgrid.show_grid(agg_actions[['year_month', 'page_id'] + total_columns])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "display(md(\"\"\"## C. Visualization of editor actions per month\"\"\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "display(md(\"\"\"In the following graph you can select the *date range* and *granularity* (yearly, montly) \n", "of the timeline (X-axis), and plot any of the follow counts in the black, red, blue and green lines:\n", " \n", "- **adds**: number of first-time insertions\n", "- **adds_surv_48h**: number of insertions for the first time that survived at least 48 hours\n", "- **adds_persistent**: number of insertions for the first time that survived until, at least, the end of the month\n", "- **adds_stopword_count**: number of insertions that were stop words\n", "- **dels**: number of deletions\n", "- **dels_surv_48h**: number of deletions that were not resinserted in the next 48 hours\n", "- **dels_persistent**: number of deletions that were not resinserted until, at least, the end of the month\n", "- **dels_stopword_count**: number of deletions that were stop words\n", "- **reins**: number of reinsertions\n", "- **reins_surv_48h**: number of reinsertionsthat survived at least 48 hours\n", "- **reins_persistent**: number of reinsertionsthat survived until the end of the month\n", "- **reins_stopword_count**: number of reinsertionsthat were stop words\n", "\"\"\"))\n", "\n", "# Listener\n", "from visualization.actions_listener import ActionsListener\n", "listener = ActionsListener(agg_actions)\n", "\n", "actions = (agg_actions.loc[:,'total':'total_stopword_count'].columns.append(\n", " agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()\n", "\n", "\n", "# Visualization\n", "from utils.notebooks import get_date_slider_from_datetime\n", "from ipywidgets import interact, fixed\n", "from ipywidgets.widgets import Dropdown\n", "\n", "interact(listener.listen, \n", " _range = get_date_slider_from_datetime(agg_actions['year_month']),\n", " editor=fixed('All'),\n", " granularity=Dropdown(options=['Yearly', 'Monthly'], value='Monthly'),\n", " black=Dropdown(options=actions, value='total'), \n", " red=Dropdown(options= ['None'] + actions, value='total_surv_48h'),\n", " green=Dropdown(options= ['None'] + actions, value='None'), \n", " blue=Dropdown(options= ['None'] + actions, value='None'))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import HTML\n", "from utils.notebooks import get_next_notebook\n", "\n", "display(HTML(f'Go to next workbook'))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }