{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "%store -r the_page\n", "\n", "if 'the_page' not in locals():\n", " import pickle\n", " print(\"Loading default data...\")\n", " the_page = pickle.load(open(\"data/the_page.p\",'rb'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's take a look at the evolution of the revision history of an English\n", "Wikipedia edition article, up to now. Example: the novel \"The Camp of the Saints\". We can get live data from various sources:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# A. Basic Info from Wikipedia\"))\n", "display(md(f\"***Search for a Wikipedia Page***\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from ipywidgets import widgets, Output\n", "from IPython.display import display, clear_output\n", "from external.wikipedia import WikipediaDV, WikipediaAPI\n", "wikipedia_dv = WikipediaDV(WikipediaAPI(domain='en.wikipedia.org'))\n", "\n", "# the method that listens to the click event\n", "# the method that listens to the click event\n", "def on_button_clicked(b):\n", " global the_page\n", " \n", " # use the out widget so the output is overwritten when two or more\n", " # searches are performed\n", " with out:\n", " try:\n", " # query wikipedia\n", " search_result = wikipedia_dv.search_page(searchTerm.value)\n", " the_page = wikipedia_dv.get_page(search_result)\n", " %store the_page\n", " clear_output()\n", " display(the_page.to_frame('value'))\n", "\n", " except:\n", " clear_output()\n", " display(md(f'The page title *\"{searchTerm.value}\"* was not found'))\n", "\n", "# by default display the last search\n", "try:\n", " searchTerm = widgets.Text(the_page['title'], description='Page title:')\n", "except:\n", " searchTerm = widgets.Text(\"The Camp of the Saints\", description='Page title:')\n", "\n", "# create and display the button \n", "button = widgets.Button(description=\"Search\")\n", "example = md(\"e.g. *The Camp of the Saints*\")\n", "display(searchTerm,example,button)\n", "\n", "# the output widge is used to remove the output after the search field\n", "out = Output()\n", "display(out)\n", "\n", "# set the event\n", "button.on_click(on_button_clicked)\n", "\n", "# trigger the event with the default value\n", "on_button_clicked(button)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from ipywidgets import widgets\n", "from IPython.display import display, Javascript\n", "\n", "def run_below(ev):\n", " display(Javascript('IPython.notebook.execute_cells_below()'))\n", "\n", "button = widgets.Button(description=\"Refresh Notebook\", button_style='info', min_width=500)\n", "button.on_click(run_below)\n", "display(button)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# B. General Statistics \"))\n", "display(md(f\"Provided through the Xtools API\"))\n", "display(md(f\"***Page: {the_page['title']}***\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from external.xtools import XtoolsAPI, XtoolsDV\n", "xtools_api = XtoolsAPI(project = 'en.wikipedia.org')\n", "xtools_dv = XtoolsDV(xtools_api)\n", "page_info = xtools_dv.get_page_info(the_page['title'])\n", "page_info.to_frame('value')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Unclear xtools fields, delete or describe better: author (=creator of the page),\n", "#pageviews_offset (delete), watchers (=Users that have added this page to their watchlist)\n", "#author_editcount(delete), secs_since_last_edit (delete), elapsed_time (?, delete=)\n", "# what is \"assessement\"? --> retain \"value\" if it means article quality and category" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, Markdown as md\n", "display(md(\"---\"))\n", "display(md(f\"# C. Page Views\"))\n", "display(md(f\"Provided by Wikimedia page view API (only available since 2015)\"))\n", "display(md(f\"***Page: {the_page['title']}***\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Graph of page views has 'actions' as the y-axis label, when it should be \"views\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Query request\n", "from external.wikimedia import WikiMediaDV, WikiMediaAPI\n", "wikimedia_api = WikiMediaAPI(project='en.wikipedia')\n", "wikimedia_dv = WikiMediaDV(wikimedia_api)\n", "views = wikimedia_dv.get_pageviews(the_page['title'], 'daily')\n", "\n", "# Visualization\n", "from visualization.views_listener import ViewsListener\n", "from ipywidgets import interact\n", "from ipywidgets.widgets import Dropdown\n", "\n", "listener = ViewsListener(views)\n", "interact(listener.listen, \n", " begin=Dropdown(options=views.timestamp),\n", " end=Dropdown(options=views.timestamp.sort_values(ascending=False)),\n", " granularity=Dropdown(options=['Yearly', 'Monthly', 'Weekly', 'Daily'], value='Monthly'))\n", "\n", "# The df_plotted keeps a reference to the plotted data above\n", "listener.df_plotted['views'].agg({\n", " 'Total views': sum,\n", " 'Max views period': max,\n", " 'Min views period': min,\n", " 'Average views': min,}).to_frame('Value')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "from utils.notebooks import get_next_notebook\n", "from IPython.display import HTML\n", "display(HTML(f'Go to next workbook'))\n" ] } ], "metadata": { "kernel_info": { "name": "python3" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "nteract": { "version": "0.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }