{ "cells": [ { "cell_type": "markdown", "id": "french-switzerland", "metadata": {}, "source": [ "# Papers Past newspapers in DigitalNZ\n", "\n", "A subset of the digitised newspapers in [Papers Past](https://paperspast.natlib.govt.nz/) can be searched through [DigitalNZ](https://digitalnz.org/). Using data from the [DigitalNZ API](https://digitalnz.org/developers/getting-started) we can look at what's available." ] }, { "cell_type": "code", "execution_count": null, "id": "funded-maximum", "metadata": {}, "outputs": [], "source": [ "# This cell just sets up some stuff that we'll need later\n", "\n", "import requests\n", "from requests.adapters import HTTPAdapter\n", "from requests.packages.urllib3.util.retry import Retry\n", "import pandas as pd\n", "from IPython.display import display, FileLink\n", "from pathlib import Path\n", "import altair as alt\n", "\n", "alt.data_transformers.enable('default')\n", "alt.data_transformers.disable_max_rows()\n", "\n", "s = requests.Session()\n", "retries = Retry(total=5, backoff_factor=1, status_forcelist=[ 502, 503, 504 ])\n", "s.mount('https://', HTTPAdapter(max_retries=retries))\n", "\n", "# Make links in Altair open in a new tab\n", "def blank_href():\n", " return {\n", " \"usermeta\": {\n", " \"embedOptions\": {\n", " 'loader': {'target': '_blank'}\n", " }\n", " }\n", " }\n", "\n", "# register the custom theme under a chosen name\n", "alt.themes.register('blank_href', blank_href)\n", "\n", "# enable the newly registered theme\n", "alt.themes.enable('blank_href')\n", "\n", "API_URL = 'http://api.digitalnz.org/v3/records.json'\n" ] }, { "cell_type": "code", "execution_count": null, "id": "contrary-violation", "metadata": {}, "outputs": [], "source": [ "# Past your API key between the quotes\n", "# You might need to trim off any spaces at the beginning and end\n", "API_KEY = 'YOUR API KEY'" ] }, { "cell_type": "code", "execution_count": null, "id": "stunning-imaging", "metadata": {}, "outputs": [], "source": [ "params = {\n", " 'and[primary_collection][]': 'Papers Past',\n", " 'facets': 'year,collection,placename',\n", " 'facets_per_page': 350,\n", " 'per_page': 0,\n", " 'api_key': API_KEY\n", "}" ] }, { "cell_type": "markdown", "id": "elect-package", "metadata": {}, "source": [ "## Total number of articles per year\n", "\n", "Hover for details. Click to search for articles in DigitalNZ." ] }, { "cell_type": "code", "execution_count": null, "id": "announced-savage", "metadata": {}, "outputs": [], "source": [ "response = s.get(API_URL, params=params)\n", "data = response.json()" ] }, { "cell_type": "code", "execution_count": null, "id": "union-privilege", "metadata": {}, "outputs": [], "source": [ "years = [{'year': int(k), 'total': v} for k, v in data['search']['facets']['year'].items()]\n", "titles = data['search']['facets']['collection']\n", "try:\n", " del(titles['Papers Past'])\n", "except KeyError:\n", " pass" ] }, { "cell_type": "code", "execution_count": null, "id": "iraqi-spelling", "metadata": {}, "outputs": [], "source": [ "# Fill in any missing years\n", "df_years = pd.DataFrame(years).set_index('year')\n", "min_year = int(df_years.index.min())\n", "max_year = int(df_years.index.max())\n", "idx = sorted(list(range(min_year, max_year + 1)))\n", "df_years = df_years.reindex(idx).reset_index()\n", "\n", "# Add a url to search in DigitalNZ\n", "df_years['url'] = df_years['year'].apply(lambda x: f'https://digitalnz.org/records?i[primary_collection]=Papers%20Past&i[year]={x}#/')" ] }, { "cell_type": "code", "execution_count": null, "id": "satellite-treasurer", "metadata": {}, "outputs": [], "source": [ "alt.Chart(df_years).mark_bar().encode(\n", " x='year:O',\n", " y='total:Q',\n", " href='url:N',\n", " tooltip=['year:N', alt.Tooltip('total:Q', format=',')]\n", ").properties(width=800)" ] }, { "cell_type": "markdown", "id": "hawaiian-processing", "metadata": { "tags": [] }, "source": [ "## Number of articles in each newspaper per year\n", "\n", "Hover for details. Click to search for articles in DigitalNZ." ] }, { "cell_type": "code", "execution_count": null, "id": "narrative-gallery", "metadata": {}, "outputs": [], "source": [ "title_dfs = []\n", "\n", "# Loop through titles to download year facets\n", "for title in titles.keys():\n", " params['and[collection][]'] = title\n", " response = s.get(API_URL, params=params)\n", " data = response.json()\n", " years = [{'year': int(k), 'total': v} for k, v in data['search']['facets']['year'].items()]\n", " # Fill in missing years\n", " df_title = pd.DataFrame(years).set_index('year')\n", " idx = sorted(list(range(min_year, max_year + 1)))\n", " df_title = df_title.reindex(idx, fill_value=0).reset_index()\n", " # Add newspaper name\n", " df_title['newspaper'] = title\n", " # Add url to search in DigitalNZ\n", " df_title['url'] = df_title.apply(lambda x: f'https://digitalnz.org/records?i[primary_collection]=Papers%20Past&i[year]={x[\"year\"]}&i[collection]={x[\"newspaper\"]}#/', axis=1)\n", " title_dfs.append(df_title)" ] }, { "cell_type": "code", "execution_count": null, "id": "banner-assignment", "metadata": {}, "outputs": [], "source": [ "df_all = pd.concat(title_dfs)" ] }, { "cell_type": "code", "execution_count": null, "id": "absolute-improvement", "metadata": {}, "outputs": [], "source": [ "alt.Chart(df_all).mark_bar().encode(\n", " x=alt.X('year:O'),\n", " y=alt.Y('total:Q', title=None),\n", " facet=alt.Facet('newspaper:N', columns=1, title=None),\n", " href='url:N',\n", " tooltip=['newspaper', 'year', alt.Tooltip('total', format=',')]\n", ").properties(width=800, height=50).resolve_scale(\n", " y='independent'\n", ").configure_view(\n", " strokeWidth=0\n", ")" ] }, { "cell_type": "markdown", "id": "ordinary-istanbul", "metadata": {}, "source": [ "----\n", "\n", "Created by [Tim Sherratt](https://timsherratt.org/) for the [GLAM Workbench](https://glam-workbench.net/). Support this project by becoming a [GitHub sponsor](https://github.com/sponsors/wragge?o=esb)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }