{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "\n", "%store -r the_page\n", "%store -r the_editor\n", "%store -r editor_inputname\n", "%store -r calculator\n", "%store -r editors_conflicts\n", "\n", "if ('the_page' not in locals() or \n", " 'the_editor' not in locals() or \n", " 'editor_inputname' not in locals() or \n", " 'calculator' not in locals() or \n", " 'editors_conflicts' not in locals()):\n", " \n", " import pickle\n", " print(\"Loading default data...\")\n", " the_page = pickle.load(open(\"data/the_page.p\",'rb'))\n", " the_editor = pickle.load(open(\"data/the_editor.p\",'rb'))\n", " editor_inputname = pickle.load(open(\"data/editor_inputname.p\",'rb'))\n", " calculator = pickle.load(open(\"data/calculator.p\",'rb'))\n", " editors_conflicts = pickle.load(open(\"data/editors_conflicts.p\",'rb'))\n", "\n", "the_editor.to_frame('value')\n", "\n", "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# A. Select an editor to analyze their conflicting editors\"))\n", "display(md(f\"***Page: {the_page['title']}***\"))\n", "display(md(\"The table below presents the conflict score and other related metrics per editor \"\n", "f\"(*editor_id* and *editor* column). Select one editor of the page \\\"{the_page['title']}\\\" to analyze \" \n", "\"the general Wikipedia metadata of the editor. At the end you can select created pages of the editor \"\n", "\"in order to restart the analysis in a different page:\"))\n", "\n", "display(md(\"\"\"\n", "- **conflict_n**: the total number of conflicts\n", "- **conflict**: the sum of conflict scores of all actions (without division)\n", "- **actions**: the total number of actions performed by the editor\n", "- **conflict_score**: the sum of conflict scores of all actions divided by the number of elegible actions\n", "- **conflict_ratio**: the count of all conflicts divided by the number of elegible actions\n", "\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "from visualization.conflicts_listener import ConflictsListener\n", "from external.wikipedia import WikipediaDV, WikipediaAPI\n", "\n", "graph_description = \"\"\"\n", "In the above graph you can select the *date range* and *granularity* (yearly, montly) \n", "of the timeline (X-axis), and plot any of the following counts in the black and red lines:\n", " \n", "- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions\n", "- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)\n", "- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions\n", "- **Number of Conflicts**: the total number of conflicts\n", "- **Total Elegible Actions**: the total number of elegible actions\n", "- **Total Conflict Time**: the sum of all the times (*time_diff_secs*) that has been taken by conflict actions\n", "- **Total Elegible Time**: the sum of all the times (*time_diff_secs*) that has been taken by elegible actions\n", "- **Time per Conflict Action**: average time of conflict actions\n", "- **Time per Elegible Action**: average time of elegible actions\n", "\"\"\"\n", "\n", "def display_conflict_score(eleg_actions):\n", " global listener\n", " \n", " from visualization.conflicts_listener import ConflictsListener\n", " listener = ConflictsListener(eleg_actions)\n", "\n", " metrics = ['Conflict Score', 'Absolute Conflict Score', \n", " 'Conflict Ratio', 'Number of Conflicts', \n", " 'Total Elegible Actions', \n", " 'Total Conflict Time', 'Total Elegible Time', \n", " 'Time per Conflict Action', 'Time per Elegible Action']\n", "\n", " display(md(f'*Total Page conflict score: {calculator.get_page_conflict_score()}*'))\n", "\n", " # Visualization\n", " from utils.notebooks import get_date_slider_from_datetime\n", " from ipywidgets import interact\n", " from ipywidgets.widgets import Dropdown\n", "\n", " interact(listener.listen,\n", " _range = get_date_slider_from_datetime(eleg_actions['rev_time']),\n", " granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),\n", " black=Dropdown(options=metrics, value='Conflict Score'),\n", " red=Dropdown(options= ['None'] + metrics, value='None'))\n", "\n", "def select_editor(editor):\n", " global editor_df\n", " global the_editor\n", " global editor_inputname\n", "\n", " editor_inputname=editor\n", " \n", " wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))\n", " try:\n", " the_editor = wikipedia_dv.get_editor(int(editor_inputname))\n", " except:\n", " the_editor = wikipedia_dv.get_editor(editor_inputname[2:])\n", "\n", " with out:\n", " %store the_editor\n", " %store editor_inputname\n", "\n", " clear_output()\n", " display(md(\"### Current Selection:\"))\n", " \n", " if 'invalid' in the_editor:\n", " display(f\"The editor {editor_inputname} was not found, try a different editor\")\n", " else:\n", " # display the data that will be passed to the next notebook\n", " display(the_editor.to_frame('values'))\n", " display(md(f\"#### Evolution of the Conflict Score of *{the_editor['name']}*\"))\n", "\n", " editor_df = calculator.elegible_actions[\n", " calculator.elegible_actions['editor'] == editor_inputname].copy()\n", "\n", "\n", " display_conflict_score(editor_df)\n", "\n", "\n", "def on_selection_change(change):\n", "\n", " try:\n", " select_editor(qg_obj.get_selected_df().iloc[0].name)\n", " except:\n", " print('Problem parsing the name. Execute the cell again and try a different editor.')\n", "\n", "import qgrid\n", "qgrid.set_grid_option('maxVisibleRows', 5)\n", "qg_obj = qgrid.show_grid(editors_conflicts)\n", "qg_obj.observe(on_selection_change, names=['_selected_rows'])\n", " \n", "from ipywidgets import Output\n", "from IPython.display import display, clear_output, Markdown as md\n", "display(md(\"### Select one editor (row) to continue the demo:\"))\n", "display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))\n", "display(qg_obj)\n", "out = Output()\n", "display(out)\n", "display(md(graph_description))\n", "select_editor(editor_inputname)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# B. Basic editor information\"))\n", "display(md(f\"Provided by Wikipedia\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "display(md(f\"\"\"The following is information about the editor directly available in Wikipedia.\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "the_editor.to_frame('value')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# C. Modified pages of an editor\"))\n", "display(md(f\"Provided through the Xtools API\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "display(md(f\"\"\"The following is some metadata about the creation and deletion\n", "of pages in Wikipedia by the editor.\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from external.xtools import XtoolsAPI, XtoolsDV\n", "from IPython.display import display, clear_output\n", "\n", "xtools_api = XtoolsAPI(project = 'en.wikipedia.org')\n", "xtools_dv = XtoolsDV(xtools_api)\n", "\n", "try:\n", " editor_info = xtools_dv.get_modified_pages_counts_per_editor(the_editor['name'])\n", " display(editor_info.to_frame('value'))\n", "except:\n", " clear_output()\n", " display(md(f'**There are no modified pages by this editor.**'))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# D. Select one page created by an editor\"))\n", "display(md(f\"Provided through the Xtools API\"))\n", "display(md(f\"***Editor: {the_editor['name']}***\"))\n", "display(md(f\"\"\"The following table shows a list of created paged by the editor with some general \n", "information about the page:\n", "- *page_title*: title of the page\n", "- *page_len*: an estimated amount of words in the page\n", "- *rev_id*: the id of the last revision\n", "- *rev_len*: the number of revisions made on that page\n", "- *rev_timestamp*: the timestamp of the last revision (last modification)\"\"\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from external.wikipedia import WikipediaDV, WikipediaAPI\n", "from IPython.display import HTML, display, clear_output\n", "from utils.notebooks import get_notebook_by_number\n", "from external.xtools import XtoolsAPI, XtoolsDV\n", "\n", "xtools_api = XtoolsAPI(project = 'en.wikipedia.org')\n", "xtools_dv = XtoolsDV(xtools_api)\n", "\n", "try:\n", " created_pages = xtools_dv.get_created_pages_per_editor(the_editor['name'])\n", "except:\n", " clear_output()\n", " display(md(f'**There are no created pages by this editor.**'))\n", " display(HTML(f'Go to next workbook'))\n", " \n", "\n", "from visualization.conflicts_listener import ConflictsListener\n", "def select_page(page):\n", " global the_page\n", " page_inputname=page\n", " \n", " wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))\n", " try:\n", " the_page = wikipedia_dv.get_page(int(page_inputname))\n", " except:\n", " the_page = wikipedia_dv.get_page(page_inputname)\n", "\n", " with out:\n", " %store the_page\n", " clear_output()\n", " display(md(\"### Current Selection:\"))\n", " \n", " if 'invalid' in the_page:\n", " display(f\"The page {page_inputname} was not found, try a different page\")\n", " else:\n", " # display the data that will be passed to the next notebook\n", " display(the_page.to_frame('values'))\n", " display(HTML(f'Go to next workbook'))\n", " \n", "\n", "def on_selection_change(change):\n", " try:\n", " select_page(qg_obj.get_selected_df().iloc[0].page_title)\n", " except:\n", " print('Problem parsing the name. Execute the cell again and try a different page.')\n", " display(HTML(f'Go to next workbook'))\n", "\n", "if 'created_pages' in locals():\n", " import qgrid\n", " qg_obj = qgrid.show_grid(created_pages[['page_title', 'page_len', 'rev_id', 'rev_len', 'rev_timestamp']])\n", " qg_obj.observe(on_selection_change, names=['_selected_rows'])\n", "\n", "if 'qg_obj' in locals():\n", " from ipywidgets import Output\n", " from IPython.display import display, clear_output, Markdown as md\n", " display(md(\"### Select one page row for the next notebook:\"))\n", " display(qg_obj)\n", " out = Output()\n", " display(out)\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }