{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Compare two versions of an archived web page\n", "\n", "[View in GitHub](https://github.com/GLAM-Workbench/web-archives/blob/master/show_diffs.ipynb) · [View in GLAM Workbench](https://glam-workbench.net/web-archives/#compare-two-versions-of-an-archived-web-page)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# This notebook is designed to run in Voila as an app (with the code hidden).\n", "# To launch this notebook in Voila, just select 'View > Open with Voila in New Browser Tab'\n", "# Your browser might ask for permission to open the new tab as a popup." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook demonstrates a number of different ways of comparing versions of archived web pages. Just choose a repository, enter a url, and select two dates to see comparisons based on:\n", "\n", "
{date}
{display_url}
Share this: {share_url}
'))\n", "\n", "\n", "def clear(e):\n", " global page_data\n", " page_data = []\n", " md_out.clear_output()\n", " stats_out.clear_output()\n", " links_out.clear_output()\n", " sim_out.clear_output()\n", " diff_out.clear_output()\n", " ss_out.clear_output()\n", " share_out.clear_output()\n", "\n", "\n", "def start(e):\n", " clear(\"e\")\n", " if url1 and url2:\n", " urls = [url1, url2]\n", " else:\n", " urls = get_mementos()\n", " load_data(urls)\n", " display_metadata(page_data)\n", " display_summaries(page_data)\n", " display_links(page_data)\n", " display_similarities(page_data)\n", " display_diff(\"e\")\n", " display_screenshots(urls)\n", " share_this(urls)\n", "\n", "\n", "def display_mementos(url1, url2, start_button):\n", " memento1 = widgets.Text(value=url1, layout=widgets.Layout(width=\"400px\"))\n", " memento2 = widgets.Text(value=url2, layout=widgets.Layout(width=\"400px\"))\n", " with options_out:\n", " display(\n", " widgets.HBox(\n", " [\n", " widgets.VBox(\n", " [\n", " widgets.Label(\"First memento:\"),\n", " widgets.Label(\"Second memento:\"),\n", " ]\n", " ),\n", " widgets.VBox([memento1, memento2, start_button]),\n", " ],\n", " layout=widgets.Layout(padding=\"20px\"),\n", " )\n", " )\n", "\n", "\n", "options_out = widgets.Output()\n", "md_out = widgets.Output()\n", "stats_out = widgets.Output()\n", "links_out = widgets.Output()\n", "sim_out = widgets.Output()\n", "diff_out = widgets.Output()\n", "ss_out = widgets.Output()\n", "share_out = widgets.Output()\n", "\n", "start_button = widgets.Button(description=\"Start\", button_style=\"primary\")\n", "start_button.on_click(start)\n", "\n", "query_string = os.environ.get(\"QUERY_STRING\", \"\")\n", "parameters = parse_qs(query_string)\n", "url1 = parameters.get(\"url1\", [\"\"])[0]\n", "url2 = parameters.get(\"url2\", [\"\"])[0]\n", "\n", "if url1 and url1:\n", " display_mementos(url1, url2, start_button)\n", "else:\n", " repository = widgets.Dropdown(\n", " options=[\n", " (\"---\", \"\"),\n", " (\"UK Web Archive\", \"bl\"),\n", " (\"UK Government Web Archive\", \"ukgwa\"),\n", " (\"National Library of Australia\", \"nla\"),\n", " (\"National Library of New Zealand\", \"nlnz\"),\n", " (\"Internet Archive\", \"ia\"),\n", " ],\n", " description=\"Archive:\",\n", " disabled=False,\n", " )\n", "\n", " target_url = widgets.Text(description=\"Target URL:\")\n", "\n", " first_date = widgets.DatePicker(description=\"Date 1: \", disabled=False)\n", "\n", " second_date = widgets.DatePicker(description=\"Date 2: \", disabled=False)\n", " with options_out:\n", " display(\n", " widgets.HBox(\n", " [\n", " widgets.VBox([repository, first_date]),\n", " widgets.VBox([target_url, second_date]),\n", " ],\n", " layout=widgets.Layout(padding=\"20px\"),\n", " ),\n", " widgets.HBox([start_button]),\n", " )\n", "display(options_out, md_out, stats_out, links_out, sim_out, diff_out, ss_out, share_out)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%capture\n", "%load_ext dotenv\n", "%dotenv" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Insert some values for automated testing\n", "\n", "if os.getenv(\"GW_STATUS\") == \"dev\":\n", " options_out.clear_output()\n", " url1 = \"https://web.archive.org.au/awa/19981206012233mp_/http://www.discontents.com.au:80/\"\n", " url2 = (\n", " \"https://web.archive.org.au/awa/20100209041537mp_/http://discontents.com.au:80/\"\n", " )\n", " display_mementos(url1, url2, start_button)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# If values have been provided via url or above, then start automatically.\n", "# Note that Voila widgets don't load immediately, hence the polling to\n", "# make sure the start button exists.\n", "\n", "if url1 and url2:\n", " script = \"\"\"\n", " \"\"\"\n", " display(HTML(script))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "----\n", "Created by [Tim Sherratt](https://timsherratt.org) for the [GLAM Workbench](https://glam-workbench.github.io). Support me by becoming a [GitHub sponsor](https://github.com/sponsors/wragge)!\n", "\n", "Work on this notebook was supported by the [IIPC Discretionary Funding Programme 2019-2020](http://netpreserve.org/projects/).\n", "\n", "The Web Archives section of the GLAM Workbench is sponsored by the [British Library](https://www.bl.uk/)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 }