{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%store -r the_page\n", "%store -r agg_actions\n", "%store -r calculator\n", "%store -r editors_conflicts\n", "\n", "if ('the_page' not in locals() or \n", " 'agg_actions' not in locals() or \n", " 'calculator' not in locals() or \n", " 'editors_conflicts' not in locals()):\n", " import pickle\n", " print(\"Loading default data...\")\n", " the_page = pickle.load(open(\"data/the_page.p\",'rb'))\n", " agg_actions = pickle.load(open(\"data/agg_actions.p\",'rb'))\n", " calculator = pickle.load(open(\"data/calculator.p\",'rb'))\n", " editors_conflicts = pickle.load(open(\"data/editors_conflicts.p\",'rb'))\n", " \n", "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# A. Select an editor to analyze their activity in the context of ***{the_page['title']}***\"))\n", "\n", "display(md(\"\"\"The table below presents the conflict score and other related metrics per editor \n", "(*editor_id* and *editor* column):\n", "\n", "- **conflict_n**: the total number of conflicts\n", "- **conflict**: the sum of conflict scores of all actions (without division)\n", "- **actions**: the total number of actions performed by the editor\n", "- **conflict_score**: the sum of conflict scores of all actions divided by the number of elegible actions\n", "- **conflict_ratio**: the count of all conflicts divided by the number of elegible actions\n", "\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "from visualization.conflicts_listener import ConflictsListener\n", "from external.wikipedia import WikipediaDV, WikipediaAPI\n", "\n", "graph_description = \"\"\"\n", "In the above graph you can select the *date range* and *granularity* (yearly, montly) \n", "of the timeline (X-axis), and plot any of the following counts in the black and red lines:\n", " \n", "- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions\n", "- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)\n", "- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions\n", "- **Number of Conflicts**: the total number of conflicts\n", "- **Total Elegible Actions**: the total number of elegible actions\n", "\"\"\"\n", "\n", "def display_conflict_score(editor_df):\n", " global listener\n", " \n", " from visualization.conflicts_listener import ConflictsListener\n", " listener = ConflictsListener(editor_df)\n", "\n", " metrics = ['Conflict Score', 'Absolute Conflict Score', \n", " 'Conflict Ratio', 'Number of Conflicts', \n", " 'Total Elegible Actions']\n", " #display(md(f'*Total Page conflict score: {calculator.get_page_conflict_score()}*'))\n", " display(md(f'*Total Page conflict score: {editor_df.conflict.sum() / editor_df.elegibles.sum()}*'))\n", "\n", " # Visualization\n", " from utils.notebooks import get_date_slider_from_datetime\n", " from ipywidgets import interact\n", " from ipywidgets.widgets import Dropdown\n", "\n", " interact(listener.listen,\n", " #_range = get_date_slider_from_datetime(editor_df['rev_time']),\n", " _range = get_date_slider_from_datetime(editor_df['year_month']),\n", " granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),\n", " black=Dropdown(options=metrics, value='Conflict Score'),\n", " red=Dropdown(options= ['None'] + metrics, value='None'))\n", "\n", "def select_editor(editor):\n", " global editor_df\n", " global the_editor\n", " global editor_inputname\n", "\n", " editor_inputname=editor\n", " \n", " wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))\n", " try:\n", " the_editor = wikipedia_dv.get_editor(int(editor_inputname))\n", " except:\n", " the_editor = wikipedia_dv.get_editor(editor_inputname[2:])\n", "\n", " with out:\n", " %store the_editor\n", " %store editor_inputname\n", "\n", " clear_output()\n", " display(md(\"### Current Selection:\"))\n", " if 'invalid' in the_editor:\n", " display(f\"The editor {editor_inputname} was not found, try a different editor\")\n", " else:\n", " # display the data that will be passed to the next notebook\n", " display(the_editor.to_frame('values'))\n", " display(md(f\"#### Evolution of the Conflict Score of *{the_editor['name']}*\"))\n", "\n", " editor_df = agg_actions[agg_actions['editor_id'] == the_editor['userid']].copy()\n", " #editor_df = calculator.elegible_actions[\n", " #calculator.elegible_actions['editor'] == editor_inputname].copy()\n", "\n", " display_conflict_score(editor_df)\n", "\n", "\n", "def on_selection_change(change):\n", "\n", " try:\n", " select_editor(qg_obj.get_selected_df().iloc[0].name)\n", " except:\n", " print('Problem parsing the name. Execute the cell again and try a different editor.')\n", "\n", "import qgrid\n", "qgrid.set_grid_option('maxVisibleRows', 5)\n", "qg_obj = qgrid.show_grid(editors_conflicts)\n", "qg_obj.observe(on_selection_change, names=['_selected_rows'])\n", " \n", "from ipywidgets import Output\n", "from IPython.display import display, clear_output, Markdown as md\n", "display(md(\"### Select one editor (row) to continue the demo:\"))\n", "display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))\n", "display(qg_obj)\n", "out = Output()\n", "display(out)\n", "display(md(graph_description))\n", "\n", "# select an editor that does not contain 0| at the beginning\n", "for ed in editors_conflicts.index:\n", " if ed != 0:\n", " select_editor(ed)\n", " break\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from ipywidgets import widgets\n", "from IPython.display import display, Javascript\n", "\n", "def run_below(ev):\n", " display(Javascript('IPython.notebook.execute_cells_below()'))\n", "\n", "button = widgets.Button(description=\"Refresh the rest of the notebook below\", button_style='info', min_width=500)\n", "button.on_click(run_below)\n", "display(button)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# B. Activity of editor on a page\"))\n", "display(md(f\"***Page: {the_page['title']}***\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "display(md(\"\"\"In the following graph you can select the *date range* and *granularity* (yearly, montly) \n", "of the timeline (X-axis), and plot any of the follow counts in the black, red, blue and green lines:\n", " \n", "- **adds**: number of first-time insertions\n", "- **adds_surv_48h**: number of insertions for the first time that survived at least 48 hours\n", "- **adds_persistent**: number of insertions for the first time that survived until, at least, the end of the month\n", "- **adds_stopword_count**: number of insertions that were stop words\n", "- **dels**: number of deletions\n", "- **dels_surv_48h**: number of deletions that were not resinserted in the next 48 hours\n", "- **dels_persistent**: number of deletions that were not resinserted until, at least, the end of the month\n", "- **dels_stopword_count**: number of deletions that were stop words\n", "- **reins**: number of reinsertions\n", "- **reins_surv_48h**: number of reinsertionsthat survived at least 48 hours\n", "- **reins_persistent**: number of reinsertionsthat survived until the end of the month\n", "- **reins_stopword_count**: number of reinsertionsthat were stop words\n", "\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "editor_agg_actions = agg_actions[agg_actions['editor_id']==the_editor.userid]\n", "\n", "#Listener\n", "from visualization.actions_listener import ActionsListener\n", "listener = ActionsListener(editor_agg_actions)\n", "actions = (editor_agg_actions.loc[:,'total':'total_stopword_count'].columns.append(\n", " editor_agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()\n", "\n", "# Visualization\n", "from utils.notebooks import get_date_slider_from_datetime\n", "from ipywidgets import interact, fixed\n", "from ipywidgets.widgets import Dropdown\n", "\n", "_range = get_date_slider_from_datetime(editor_agg_actions['year_month'])\n", "interact(listener.listen, \n", " _range = get_date_slider_from_datetime(editor_agg_actions['year_month']),\n", " editor=fixed('All'),\n", " granularity=Dropdown(options=['Yearly', 'Monthly'], value='Monthly'),\n", " black=Dropdown(options=actions, value='total'), \n", " red=Dropdown(options= ['None'] + actions, value='total_surv_48h'),\n", " green=Dropdown(options= ['None'] + actions, value='None'), \n", " blue=Dropdown(options= ['None'] + actions, value='None'))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# C. Tokens that enter into conflict with other editors\"))\n", "display(md(f\"***Page: {the_page['title']}***\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "\n", "display(md(\"\"\" The WordCloud displays the most common token strings (words) that a particular editor \n", "inserted or deleted and that enter into conflict with other editors. The size of the token string in \n", "the WordCloud indicates frequency of actions.\n", "\n", "In the controls, you can select the *date range*, the type of *action* (insertion or deletion), and the \n", "*source*. The *source* can be any of the following:\n", "\n", "- **Only Conflicts**: use only the actions that are in conflict.\n", "- **Elegible Actions**: use only the actions that can potentially enter into conflict, i.e. actions \n", "that have occurred at least twice, e.g. the token x has been inserted twice (which necessarily implies \n", "it was remove once), the token x has been deleted twice (which necessarily implies it was inserted twice) \n", "- **All Actions**: use all tokens regardles conflict\n", "\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sources = {\n", " 'All actions': calculator.all_actions[calculator.all_actions['editor']==str(editor_inputname)],\n", " 'Elegible Actions': calculator.elegible_actions[calculator.elegible_actions['editor']==str(editor_inputname)],\n", " 'Only Conflicts': calculator.conflicts[calculator.conflicts['editor']==str(editor_inputname)],\n", "}\n", "\n", "\n", "# listener\n", "from visualization.wordcloud_listener import WCListener\n", "\n", "listener = WCListener(sources)\n", "\n", "# visualization\n", "from utils.notebooks import get_date_slider_from_datetime\n", "from ipywidgets import interact, fixed\n", "from ipywidgets.widgets import Dropdown\n", "\n", "interact(listener.listen, \n", " _range=get_date_slider_from_datetime(calculator.all_actions['rev_time']),\n", " source=Dropdown(options=list(listener.sources.keys()), value='Only Conflicts'),\n", " action=Dropdown(options=['Both', 'Just Insertions', 'Just Deletions'], value='Both'),\n", " editor=fixed('All'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# D. Tokens in the page owner by the editor\"))\n", "display(md(f\"***Page: {the_page['title']}***\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "\n", "\n", "display(md(\"\"\"The following time line shows the token owned by this editor. The ownership \n", "(or authorship) is based in the WikiWho algorithm (\n", "[Flöck & Acosta, 2014](http://wwwconference.org/proceedings/www2014/proceedings/p843.pdf)).\n", "The graph shows that it is possible to recover the amount of tokens that an editor at any\n", "point of time. The time points are selected based on instances in which insertions or deletions\n", "were perfomerd in the editor's tokens. However, notice that the percentages of ownership might\n", "vary because percentages are relative to insertions or deletions of tokens of other editors.\n", "This is why the current date is also included in the graph.\n", "\n", "In the controls, you can select the *date range*, the *granularity* (Daily, Monthly, Yearly), and \n", "the *metric* that will be plotted (Tokens Owned or Tokens Owned(%)).\n", "\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from visualization.owned_listener import OwnedListener\n", "all_actions = calculator.all_actions\n", "listener = OwnedListener(all_actions, str(editor_inputname))\n", "traces = ['Tokens Owned', 'Tokens Owned (%)']\n", "\n", "# Visualization\n", "from utils.notebooks import get_date_slider_from_datetime\n", "from ipywidgets import interact\n", "from ipywidgets.widgets import Dropdown\n", "\n", "interact(listener.listen,\n", " _range = get_date_slider_from_datetime(listener.days),\n", " granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Monthly'),\n", " trace=Dropdown(options=traces, value='Tokens Owned (%)', description='metric'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import HTML\n", "from utils.notebooks import get_next_notebook, get_previous_notebook\n", "\n", "editor_actions = calculator.elegible_actions[calculator.elegible_actions['editor']==str(editor_inputname)]\n", "\n", "if len(editor_actions) > 0:\n", " display(HTML(f'Go to next workbook'))\n", "else:\n", " display(HTML('

This editor has no actions. Please select an editor that has '\n", " 'actions to continue to the next notebook.

'))" ] } ], "metadata": { "kernel_info": { "name": "python3" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" }, "nteract": { "version": "0.14.4" } }, "nbformat": 4, "nbformat_minor": 2 }