{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2\n",
"%store -r the_page\n",
"\n",
"if 'the_page' not in locals():\n",
" import pickle\n",
" print(\"Loading default data...\")\n",
" the_page = pickle.load(open(\"data/the_page.p\",'rb'))\n",
"\n",
"from IPython.display import display, Markdown as md\n",
"display(md(\"---\"))\n",
"display(md(f\"# A. Insertions, Deletions, Reinsertions (Actions)\"))\n",
"display(md(f\" Provided by the [WikiWho API](https://www.wikiwho.net/en/api/v1.0.0-beta/)\"))\n",
"display(md(f\"Please give the background processes time to load (see cog wheel symbol right of 'edit app') before interacting with the controls too often!\"))\n",
"display(md(f\"***Page: {the_page['title']}***\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#missing description of the action types, directly after the header, see the tocktrack paper for explanations\n",
"# please add editor names instead of \"editor id\". for IPs just put \"Unregistered editors\"\n",
"# Fails for larger articles like 'Evolution' on GESIS notebooks, Kernel dies when ranges are selected. Solution?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from wikiwho_wrapper import WikiWho\n",
"import pandas as pd\n",
"import qgrid\n",
"# set the default max number of rows to 10 so the larger DataFrame we render don't take up to much space \n",
"qgrid.set_grid_option('maxVisibleRows', 5)\n",
"\n",
"wikiwho = WikiWho(lng='en')\n",
"agg_actions = wikiwho.dv.actions(the_page.page_id)\n",
"\n",
"# define total columns\n",
"total_columns = ['total', 'total_surv_48h', 'total_persistent', 'total_stopword_count']\n",
"\n",
"# add columns with the total actions\n",
"agg_actions = agg_actions.join(pd.DataFrame(\n",
" agg_actions.loc[:,'adds':'adds_stopword_count'].values +\\\n",
" agg_actions.loc[:,'dels':'dels_stopword_count'].values +\\\n",
" agg_actions.loc[:,'reins':'reins_stopword_count'].values, \n",
" index=agg_actions.index, \n",
" columns=total_columns\n",
"))\n",
"\n",
"qgrid.show_grid(agg_actions[['year_month', 'editor_id'] + total_columns])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Convert to datetime\n",
"agg_actions['year_month'] = pd.to_datetime(agg_actions['year_month'])\n",
"\n",
"# Group the data by year month and page (drop the editor information)\n",
"agg_actions.drop('editor_id', axis=1).groupby(['year_month','page_id']).sum()\n",
"\n",
"# Listener\n",
"from visualization.actions_listener import ActionsListener\n",
"listener = ActionsListener(agg_actions)\n",
"action_types = (agg_actions.loc[:,'total':'total_stopword_count'].columns.append(\n",
" agg_actions.loc[:,'adds':'reins_stopword_count'].columns)).values.tolist()\n",
"\n",
"# Visualization\n",
"from utils.notebooks import get_date_slider_from_datetime\n",
"from ipywidgets import interact, fixed\n",
"from ipywidgets.widgets import Dropdown\n",
"\n",
"interact(listener.listen,\n",
" _range = get_date_slider_from_datetime(agg_actions['year_month']),\n",
" editor=fixed('All'),\n",
" granularity=Dropdown(options=['Yearly', 'Monthly'], value='Yearly'),\n",
" black=Dropdown(options=action_types, value='total'), \n",
" red=Dropdown(options= ['None'] + action_types, value='total_surv_48h'),\n",
" green=Dropdown(options= ['None'] + action_types, value='None'), \n",
" blue=Dropdown(options= ['None'] + action_types, value='None'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display, Markdown as md\n",
"display(md(\"---\"))\n",
"display(md(f\"# B. Measuring conflict\"))\n",
"display(md(f'## B.1 Token Conflict Score'))\n",
"display(md(f\"***Page: {the_page['title']}***\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# create the api\n",
"from wikiwho_wrapper import WikiWho\n",
"wikiwho = WikiWho(lng='en')\n",
"\n",
"from IPython.display import display, Markdown as md\n",
"# Get the content and revisions from the wikiwho api\n",
"display(md(\"Downloading all_content from the WikiWhoApi...\"))\n",
"all_content = wikiwho.dv.all_content(the_page['page_id'])\n",
"\n",
"display(md(\"Downloading revisions from the WikiWhoApi...\"))\n",
"revisions = wikiwho.dv.rev_ids_of_article(the_page['page_id'])\n",
"\n",
"from IPython.display import clear_output\n",
"clear_output()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from metrics.conflict import ConflictManager\n",
"from wikiwho_wrapper import WikiWho\n",
"from IPython.display import clear_output\n",
"from IPython.display import HTML\n",
"from utils.notebooks import get_next_notebook, get_previous_notebook\n",
"\n",
"# call the calculator\n",
"calculator = ConflictManager(all_content, revisions)\n",
"calculator.calculate()\n",
"clear_output()\n",
"\n",
"# display the tokens, the difference in seconds and its corresponding conflict score\n",
"conflicts = calculator.conflicts.copy()\n",
"conflicts['time_diff_secs'] = conflicts['time_diff'].dt.total_seconds()\n",
" \n",
"if len(conflicts) > 0:\n",
" display(qgrid.show_grid(conflicts[[\n",
" 'action', 'token', 'token_id', 'rev_id', \n",
" 'editor', 'time_diff_secs', 'conflict']].sort_values('conflict', ascending=False)))\n",
"else:\n",
" display(md(f'**There are no conflicting tokes in this page.**'))\n",
" display(HTML(f'Go back to the previous workbook'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"# explain the columns of the table"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display, Markdown as md\n",
"display(md(\"---\"))\n",
"display(md(f'## B.2 Conflicting tokens per page'))\n",
"display(md(f\"***Page: {the_page['title']}***\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"#explain: Eligible Actions, all actions, only conflicts, before the selection boxes and how the score is calculated (at least the basics)\n",
"# explain what the colors in the tag cloud mean (why are insertions red and deletions blue, not the other way around?)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# listener\n",
"from visualization.wordcloud_listener import WCListener\n",
"\n",
"listener = WCListener(sources = {\n",
" 'All actions': calculator.all_actions,\n",
" 'Eligible Actions': calculator.elegible_actions,\n",
" 'Only Conflicts': calculator.conflicts\n",
"})\n",
"\n",
"# visualization\n",
"from utils.notebooks import get_date_slider_from_datetime\n",
"from ipywidgets import interact, fixed\n",
"from ipywidgets.widgets import Dropdown\n",
"\n",
"interact(listener.listen, \n",
" _range=get_date_slider_from_datetime(calculator.all_actions['rev_time']),\n",
" source=Dropdown(options=list(listener.sources.keys()), value='Only Conflicts'),\n",
" action=Dropdown(options=['Both', 'Just Insertions', 'Just Deletions'], value='Both'),\n",
" editor=fixed('All'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display, Markdown as md\n",
"display(md(\"---\"))\n",
"display(md(f'## B.3 Conflict Score and related metrics'))\n",
"display(md(f\"***Page: {the_page['title']}***\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Visualization\n",
"from visualization.conflicts_listener import ConflictsListener\n",
"elegible_actions = calculator.elegible_actions.copy()\n",
"listener = ConflictsListener(elegible_actions)\n",
"\n",
"metrics = ['Conflict Score', 'Conflict Ratio', 'Total Conflicts', \n",
" 'Total Elegible Actions', 'Total Actions', 'Total Time',\n",
" 'Time per Elegible Action']\n",
"\n",
"display(md(f'**Page conflict score: {calculator.get_page_conflict_score()}**'))\n",
"\n",
"# Visualization\n",
"from utils.notebooks import get_date_slider_from_datetime\n",
"from ipywidgets import interact\n",
"from ipywidgets.widgets import Dropdown\n",
"\n",
"if (calculator.get_page_conflict_score() != 0):\n",
" interact(listener.listen,\n",
" _range = get_date_slider_from_datetime(elegible_actions['rev_time']),\n",
" granularity=Dropdown(options=['Yearly', 'Monthly', 'Daily'], value='Monthly'),\n",
" black=Dropdown(options=metrics, value='Conflict Score'),\n",
" red=Dropdown(options= ['None'] + metrics, value='None'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"inputHidden": false,
"outputHidden": false
},
"outputs": [],
"source": [
"#explain difference betweet conflict ratioo, scores, total conflicts...time per el..."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import display, Markdown as md\n",
"display(md(\"---\"))\n",
"display(md(f'## B.4 Conflict Score per Editor'))\n",
"display(md(f\"***Page: {the_page['title']}***\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"editors_conflicts = calculator.get_conflict_score_per_editor()\n",
"qg_obj = qgrid.show_grid(editors_conflicts)\n",
"if len(editors_conflicts) > 0:\n",
" display(qg_obj)\n",
"else:\n",
" display(md(f'**There is no Conflict Score**')) "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from IPython.display import HTML\n",
"from utils.notebooks import get_next_notebook, get_previous_notebook\n",
"\n",
"%store agg_actions\n",
"%store calculator\n",
"clear_output()\n",
" \n",
"\n",
"if len(editors_conflicts) > 0:\n",
" display(HTML(f'Go to next workbook'))\n",
"else:\n",
" display(HTML(f'Go back to the previous workbook'))\n"
]
}
],
"metadata": {
"kernel_info": {
"name": "python3"
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"nteract": {
"version": "0.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}