{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualise a search in Papers Past" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import requests\n", "import pandas as pd\n", "import altair as alt\n", "from IPython.display import display, HTML" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Your API key is: [YOUR API KEY]\n" ] } ], "source": [ "api_key = '[YOUR API KEY]'\n", "print('Your API key is: {}'.format(api_key))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Base url for queries\n", "api_search_url = 'http://api.digitalnz.org/v3/records.json'\n", "\n", "# Set up the query params (we'll change these later)\n", "# Let's start with an empty text query to look at everything\n", "params = {\n", " 'api_key': api_key,\n", " 'text': ''\n", "}" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "params['and[display_collection][]'] = 'Papers Past'\n", "params['text'] = 'possum OR opossum'\n", "params['facets'] = 'year,collection'\n", "params['facets_per_page'] = 100\n", "response = requests.get(api_search_url, params=params)\n", "data = response.json()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlecount
0Papers Past22649
1Evening Post5379
2Otago Daily Times3256
3West Coast Times1227
4Star1207
\n", "
" ], "text/plain": [ " title count\n", "0 Papers Past 22649\n", "1 Evening Post 5379\n", "2 Otago Daily Times 3256\n", "3 West Coast Times 1227\n", "4 Star 1207" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "titles = data['search']['facets']['collection']\n", "titles_df = pd.Series(titles).to_frame().reset_index()\n", "titles_df.columns = ['title', 'count']\n", "titles_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
yearcounturl
01874807https://paperspast.natlib.govt.nz/newspapers?q...
11893666https://paperspast.natlib.govt.nz/newspapers?q...
21898641https://paperspast.natlib.govt.nz/newspapers?q...
31872622https://paperspast.natlib.govt.nz/newspapers?q...
41873622https://paperspast.natlib.govt.nz/newspapers?q...
\n", "
" ], "text/plain": [ " year count url\n", "0 1874 807 https://paperspast.natlib.govt.nz/newspapers?q...\n", "1 1893 666 https://paperspast.natlib.govt.nz/newspapers?q...\n", "2 1898 641 https://paperspast.natlib.govt.nz/newspapers?q...\n", "3 1872 622 https://paperspast.natlib.govt.nz/newspapers?q...\n", "4 1873 622 https://paperspast.natlib.govt.nz/newspapers?q..." ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "years = data['search']['facets']['year']\n", "years_df = pd.Series(years).to_frame().reset_index()\n", "years_df.columns = ['year', 'count']\n", "years_df['url'] = 'https://paperspast.natlib.govt.nz/newspapers?query={0}&start_date=01-01-{1}&end_date=31-12-{1}'.format(params['text'], years_df['year'][0])\n", "years_df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.HConcatChart(...)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c1 = alt.Chart(years_df, width=600).mark_line(point=True).encode(\n", " x = 'year(year):T',\n", " y = 'count:Q',\n", " tooltip = [alt.Tooltip('year(year):T', title='year'), alt.Tooltip('count', format=',')],\n", " href='url:N'\n", ").properties(\n", " height=300,\n", " width=500\n", ")\n", "\n", "c2 = alt.Chart(titles_df[1:11]).mark_bar().encode(\n", " x = 'count:Q',\n", " y = 'title:O',\n", " tooltip = alt.Tooltip('count', format=',')\n", ").properties(\n", " height=300,\n", " width=200\n", ")\n", "\n", "c1 | c2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "----\n", "\n", "Created by [Tim Sherratt](https://timsherratt.org/) for the [GLAM Workbench](https://glam-workbench.net/). Support this project by becoming a [GitHub sponsor](https://github.com/sponsors/wragge?o=esb)." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }