{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"%store -r the_page\n",
"%store -r the_editor\n",
"%store -r editor_inputname\n",
"%store -r calculator\n",
"%store -r editors_conflicts\n",
"\n",
"if ('the_page' not in locals() or \n",
" 'the_editor' not in locals() or \n",
" 'editor_inputname' not in locals() or \n",
" 'calculator' not in locals() or \n",
" 'editors_conflicts' not in locals()):\n",
" \n",
" import pickle\n",
" print(\"Loading default data...\")\n",
" the_page = pickle.load(open(\"data/the_page.p\",'rb'))\n",
" the_editor = pickle.load(open(\"data/the_editor.p\",'rb'))\n",
" editor_inputname = pickle.load(open(\"data/editor_inputname.p\",'rb'))\n",
" calculator = pickle.load(open(\"data/calculator.p\",'rb'))\n",
" editors_conflicts = pickle.load(open(\"data/editors_conflicts.p\",'rb'))\n",
"\n",
"the_editor.to_frame('value')\n",
"\n",
"from IPython.display import display, Markdown as md\n",
"display(md(\"---\"))\n",
"display(md(f\"# A. Select an editor to analyze their conflicting editors\"))\n",
"display(md(f\"***Page: {the_page['title']}***\"))\n",
"display(md(\"The table below presents the conflict score and other related metrics per editor \"\n",
"f\"(*editor_id* and *editor* column). Select one editor of the page \\\"{the_page['title']}\\\" to analyze \" \n",
"\"the general Wikipedia metadata of the editor. At the end you can select created pages of the editor \"\n",
"\"in order to restart the analysis in a different page:\"))\n",
"\n",
"display(md(\"\"\"\n",
"- **conflict_n**: the total number of conflicts\n",
"- **conflict**: the sum of conflict scores of all actions (without division)\n",
"- **actions**: the total number of actions performed by the editor\n",
"- **conflict_score**: the sum of conflict scores of all actions divided by the number of elegible actions\n",
"- **conflict_ratio**: the count of all conflicts divided by the number of elegible actions\n",
"\"\"\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"from external.wikipedia import WikipediaDV, WikipediaAPI\n",
"\n",
"graph_description = \"\"\"\n",
"In the above graph you can select the *date range* and *granularity* (yearly, montly) \n",
"of the timeline (X-axis), and plot any of the following counts in the black and red lines:\n",
" \n",
"- **Conflict Score**: the sum of conflict scores of all actions divided by the number of elegible actions\n",
"- **Absolute Conflict Score**: the sum of conflict scores of all actions (without division)\n",
"- **Conflict Ratio**: the count of all conflicts divided by the number of elegible actions\n",
"- **Number of Conflicts**: the total number of conflicts\n",
"- **Total Elegible Actions**: the total number of elegible actions\n",
"- **Total Conflict Time**: the sum of all the times (*time_diff_secs*) that has been taken by conflict actions\n",
"- **Total Elegible Time**: the sum of all the times (*time_diff_secs*) that has been taken by elegible actions\n",
"- **Time per Conflict Action**: average time of conflict actions\n",
"- **Time per Elegible Action**: average time of elegible actions\n",
"\"\"\"\n",
"\n",
"def display_conflict_score(eleg_actions):\n",
" global listener\n",
" \n",
" from visualization.calculator_listener import ConflictCalculatorListener\n",
" listener = ConflictCalculatorListener(eleg_actions)\n",
"\n",
" metrics = ['Conflict Score', 'Absolute Conflict Score', \n",
" 'Conflict Ratio', 'Number of Conflicts', \n",
" 'Total Elegible Actions', \n",
" 'Total Conflict Time', 'Total Elegible Time', \n",
" 'Time per Conflict Action', 'Time per Elegible Action']\n",
"\n",
" display(md(f'*Total Page conflict score: {calculator.get_page_conflict_score()}*'))\n",
"\n",
" # Visualization\n",
" from utils.notebooks import get_date_slider_from_datetime\n",
" from ipywidgets import interact\n",
" from ipywidgets.widgets import Dropdown\n",
"\n",
" interact(listener.listen,\n",
" _range = get_date_slider_from_datetime(eleg_actions['rev_time']),\n",
" granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Daily'),\n",
" black=Dropdown(options=metrics, value='Conflict Score'),\n",
" red=Dropdown(options= ['None'] + metrics, value='None'))\n",
"\n",
"def select_editor(editor):\n",
" global editor_df\n",
" global the_editor\n",
" global editor_inputname\n",
"\n",
" editor_inputname=editor\n",
" \n",
" wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))\n",
" try:\n",
" the_editor = wikipedia_dv.get_editor(int(editor_inputname))\n",
" except:\n",
" the_editor = wikipedia_dv.get_editor(editor_inputname[2:])\n",
"\n",
" with out:\n",
" %store the_editor\n",
" %store editor_inputname\n",
"\n",
" clear_output()\n",
" display(md(\"### Current Selection:\"))\n",
" \n",
" if 'invalid' in the_editor:\n",
" display(f\"The editor {editor_inputname} was not found, try a different editor\")\n",
" else:\n",
" # display the data that will be passed to the next notebook\n",
" display(the_editor.to_frame('values'))\n",
" display(md(f\"#### Evolution of the Conflict Score of *{the_editor['name']}*\"))\n",
"\n",
" editor_df = calculator.elegible_actions[\n",
" calculator.elegible_actions['editor'] == str(editor_inputname)].copy()\n",
"\n",
"\n",
" display_conflict_score(editor_df)\n",
"\n",
"\n",
"def on_selection_change(change):\n",
"\n",
" try:\n",
" select_editor(qg_obj.get_selected_df().iloc[0].name)\n",
" except:\n",
" print('Problem parsing the name. Execute the cell again and try a different editor.')\n",
"\n",
"import qgrid\n",
"qgrid.set_grid_option('maxVisibleRows', 5)\n",
"qg_obj = qgrid.show_grid(editors_conflicts)\n",
"qg_obj.observe(on_selection_change, names=['_selected_rows'])\n",
" \n",
"from ipywidgets import Output\n",
"from IPython.display import display, clear_output, Markdown as md\n",
"display(md(\"### Select one editor (row) to continue the demo:\"))\n",
"display(md('**Recomendation:** select an editor with *many conflicts* and *mid-high conflict score*'))\n",
"display(qg_obj)\n",
"out = Output()\n",
"display(out)\n",
"display(md(graph_description))\n",
"select_editor(editor_inputname)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display, Markdown as md\n",
"display(md(\"---\"))\n",
"display(md(f\"# B. Basic editor information\"))\n",
"display(md(f\"Provided by Wikipedia\"))\n",
"display(md(f\"***Editor: {the_editor['name']}***\"))\n",
"display(md(f\"\"\"The following is information about the editor directly available in Wikipedia.\"\"\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"the_editor.to_frame('value')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display, Markdown as md\n",
"display(md(\"---\"))\n",
"display(md(f\"# C. Modified pages of an editor\"))\n",
"display(md(f\"Provided through the Xtools API\"))\n",
"display(md(f\"***Editor: {the_editor['name']}***\"))\n",
"display(md(f\"\"\"The following is some metadata about the creation and deletion\n",
"of pages in Wikipedia by the editor.\"\"\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from external.xtools import XtoolsAPI, XtoolsDV\n",
"from IPython.display import display, clear_output\n",
"\n",
"xtools_api = XtoolsAPI(project = 'en.wikipedia.org')\n",
"xtools_dv = XtoolsDV(xtools_api)\n",
"\n",
"try:\n",
" editor_info = xtools_dv.get_modified_pages_counts_per_editor(the_editor['name'])\n",
" display(editor_info.to_frame('value'))\n",
"except:\n",
" clear_output()\n",
" display(md(f'**There are no modified pages by this editor.**'))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display, Markdown as md\n",
"display(md(\"---\"))\n",
"display(md(f\"# D. Select one page created by an editor\"))\n",
"display(md(f\"Provided through the Xtools API\"))\n",
"display(md(f\"***Editor: {the_editor['name']}***\"))\n",
"display(md(f\"\"\"The following table shows a list of created paged by the editor with some general \n",
"information about the page:\n",
"- *page_title*: title of the page\n",
"- *page_len*: an estimated amount of words in the page\n",
"- *rev_id*: the id of the last revision\n",
"- *rev_len*: the number of revisions made on that page\n",
"- *rev_timestamp*: the timestamp of the last revision (last modification)\"\"\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from external.wikipedia import WikipediaDV, WikipediaAPI\n",
"from IPython.display import HTML, display, clear_output\n",
"from utils.notebooks import get_notebook_by_number\n",
"from external.xtools import XtoolsAPI, XtoolsDV\n",
"\n",
"xtools_api = XtoolsAPI(project = 'en.wikipedia.org')\n",
"xtools_dv = XtoolsDV(xtools_api)\n",
"\n",
"try:\n",
" created_pages = xtools_dv.get_created_pages_per_editor(the_editor['name'])\n",
"except:\n",
" clear_output()\n",
" display(md(f'**There are no created pages by this editor.**'))\n",
" display(HTML(f'Go to next workbook'))\n",
" \n",
"\n",
"from visualization.conflicts_listener import ConflictsListener\n",
"def select_page(page):\n",
" global the_page\n",
" page_inputname=page\n",
" \n",
" wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))\n",
" try:\n",
" the_page = wikipedia_dv.get_page(int(page_inputname))\n",
" except:\n",
" the_page = wikipedia_dv.get_page(page_inputname)\n",
"\n",
" with out:\n",
" %store the_page\n",
" clear_output()\n",
" display(md(\"### Current Selection:\"))\n",
" \n",
" if 'invalid' in the_page:\n",
" display(f\"The page {page_inputname} was not found, try a different page\")\n",
" else:\n",
" # display the data that will be passed to the next notebook\n",
" display(the_page.to_frame('values'))\n",
" display(HTML(f'Go to next workbook'))\n",
" \n",
"\n",
"def on_selection_change(change):\n",
" try:\n",
" select_page(qg_obj.get_selected_df().iloc[0].page_title)\n",
" except:\n",
" print('Problem parsing the name. Execute the cell again and try a different page.')\n",
" display(HTML(f'Go to next workbook'))\n",
"\n",
"if 'created_pages' in locals():\n",
" import qgrid\n",
" qg_obj = qgrid.show_grid(created_pages[['page_title', 'page_len', 'rev_id', 'rev_len', 'rev_timestamp']])\n",
" qg_obj.observe(on_selection_change, names=['_selected_rows'])\n",
"\n",
"if 'qg_obj' in locals():\n",
" from ipywidgets import Output\n",
" from IPython.display import display, clear_output, Markdown as md\n",
" display(md(\"### Select one page row for the next notebook:\"))\n",
" display(qg_obj)\n",
" out = Output()\n",
" display(out)\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}