{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Visualise a search in Papers Past"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import requests\n",
"import pandas as pd\n",
"import altair as alt\n",
"from IPython.display import display, HTML"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Your API key is: [YOUR API KEY]\n"
]
}
],
"source": [
"api_key = '[YOUR API KEY]'\n",
"print('Your API key is: {}'.format(api_key))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Base url for queries\n",
"api_search_url = 'http://api.digitalnz.org/v3/records.json'\n",
"\n",
"# Set up the query params (we'll change these later)\n",
"# Let's start with an empty text query to look at everything\n",
"params = {\n",
" 'api_key': api_key,\n",
" 'text': ''\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"params['and[display_collection][]'] = 'Papers Past'\n",
"params['text'] = 'possum OR opossum'\n",
"params['facets'] = 'year,collection'\n",
"params['facets_per_page'] = 100\n",
"response = requests.get(api_search_url, params=params)\n",
"data = response.json()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" title | \n",
" count | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" Papers Past | \n",
" 22649 | \n",
"
\n",
" \n",
" | 1 | \n",
" Evening Post | \n",
" 5379 | \n",
"
\n",
" \n",
" | 2 | \n",
" Otago Daily Times | \n",
" 3256 | \n",
"
\n",
" \n",
" | 3 | \n",
" West Coast Times | \n",
" 1227 | \n",
"
\n",
" \n",
" | 4 | \n",
" Star | \n",
" 1207 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" title count\n",
"0 Papers Past 22649\n",
"1 Evening Post 5379\n",
"2 Otago Daily Times 3256\n",
"3 West Coast Times 1227\n",
"4 Star 1207"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"titles = data['search']['facets']['collection']\n",
"titles_df = pd.Series(titles).to_frame().reset_index()\n",
"titles_df.columns = ['title', 'count']\n",
"titles_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" year | \n",
" count | \n",
" url | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 1874 | \n",
" 807 | \n",
" https://paperspast.natlib.govt.nz/newspapers?q... | \n",
"
\n",
" \n",
" | 1 | \n",
" 1893 | \n",
" 666 | \n",
" https://paperspast.natlib.govt.nz/newspapers?q... | \n",
"
\n",
" \n",
" | 2 | \n",
" 1898 | \n",
" 641 | \n",
" https://paperspast.natlib.govt.nz/newspapers?q... | \n",
"
\n",
" \n",
" | 3 | \n",
" 1872 | \n",
" 622 | \n",
" https://paperspast.natlib.govt.nz/newspapers?q... | \n",
"
\n",
" \n",
" | 4 | \n",
" 1873 | \n",
" 622 | \n",
" https://paperspast.natlib.govt.nz/newspapers?q... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" year count url\n",
"0 1874 807 https://paperspast.natlib.govt.nz/newspapers?q...\n",
"1 1893 666 https://paperspast.natlib.govt.nz/newspapers?q...\n",
"2 1898 641 https://paperspast.natlib.govt.nz/newspapers?q...\n",
"3 1872 622 https://paperspast.natlib.govt.nz/newspapers?q...\n",
"4 1873 622 https://paperspast.natlib.govt.nz/newspapers?q..."
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"years = data['search']['facets']['year']\n",
"years_df = pd.Series(years).to_frame().reset_index()\n",
"years_df.columns = ['year', 'count']\n",
"years_df['url'] = 'https://paperspast.natlib.govt.nz/newspapers?query={0}&start_date=01-01-{1}&end_date=31-12-{1}'.format(params['text'], years_df['year'][0])\n",
"years_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
""
],
"text/plain": [
"alt.HConcatChart(...)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"c1 = alt.Chart(years_df, width=600).mark_line(point=True).encode(\n",
" x = 'year(year):T',\n",
" y = 'count:Q',\n",
" tooltip = [alt.Tooltip('year(year):T', title='year'), alt.Tooltip('count', format=',')],\n",
" href='url:N'\n",
").properties(\n",
" height=300,\n",
" width=500\n",
")\n",
"\n",
"c2 = alt.Chart(titles_df[1:11]).mark_bar().encode(\n",
" x = 'count:Q',\n",
" y = 'title:O',\n",
" tooltip = alt.Tooltip('count', format=',')\n",
").properties(\n",
" height=300,\n",
" width=200\n",
")\n",
"\n",
"c1 | c2"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"----\n",
"\n",
"Created by [Tim Sherratt](https://timsherratt.org/) for the [GLAM Workbench](https://glam-workbench.net/). Support this project by becoming a [GitHub sponsor](https://github.com/sponsors/wragge?o=esb)."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}