{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Warning! Code ahead!" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "-" } }, "source": [ "#### Some setting up...\n", "\n", "Eeek! We're going to run **live** code during this presentation. To run a code cell (like the one below), just hover over it and then click on the play icon that pops up in the left margin. Do this for every code cell you see!" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "import requests\n", "import pandas as pd\n", "import json\n", "import altair as alt\n", "from tqdm import tnrange, trange\n", "import folium\n", "from folium.plugins import HeatMapWithTime\n", "from IPython.display import display, HTML\n", "api_key = 'ju3rgk0jp354ikmh'\n", "alt.renderers.enable('notebook')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Tell me Trove, how many newspaper articles do you have about 'influenza'?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "params = {\n", " 'q': 'influenza',\n", " 'zone': 'newspaper',\n", " 'encoding': 'json',\n", " 'facet': 'state',\n", " 'n': '1',\n", " 'key': api_key\n", "}\n", "response = requests.get('http://api.trove.nla.gov.au/v2/result', params=params)\n", "data = response.json()\n", "total = int(data['response']['zone'][0]['records']['total'])\n", "display(HTML('

There are {:,} articles Tim.

'.format(total)))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Can you show the number of results in each state?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "# Make a map\n", "facets = data['response']['zone'][0]['facets']['facet']['term']\n", "df = pd.DataFrame(facets)\n", "df['count'] = pd.to_numeric(df['count'], errors='coerce')\n", "df = df.replace('ACT', 'Australian Capital Territory')\n", "with open('data/aus_state.geojson', \"r\") as geo_file:\n", " geo_data = json.load(geo_file)\n", "c1 = alt.Chart(alt.Data(values=geo_data['features'])\n", " ).mark_geoshape(stroke='black', strokeWidth=0.2\n", " ).encode(color=alt.Color('count:Q', scale=alt.Scale(scheme='greenblue'), legend=alt.Legend(title='Total articles'))\n", " ).transform_lookup(lookup='properties.STATE_NAME', from_=alt.LookupData(df, 'display', ['count'])\n", " ).project(type='mercator'\n", " ).properties(width=600, height=400)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "# Display the map\n", "c1" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Can you break them down by category?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "# Get some data\n", "params = {\n", " 'q': 'influenza',\n", " 'zone': 'newspaper',\n", " 'encoding': 'json',\n", " 'facet': 'category',\n", " 'n': '1',\n", " 'key': api_key\n", "}\n", "response = requests.get('http://api.trove.nla.gov.au/v2/result', params=params)\n", "data = response.json()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "# Make a chart\n", "facets = data['response']['zone'][0]['facets']['facet']['term']\n", "df = pd.DataFrame(facets)\n", "df['count'] = pd.to_numeric(df['count'], errors='coerce')\n", "c2 = alt.Chart(df).mark_bar().encode(\n", " x=alt.X('count:Q', title='Number of articles'),\n", " y=alt.Y('display:N', title='Category'),\n", " tooltip=['count:Q']\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "# Display the chart\n", "c2" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Can you show change over time?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "# Get some data\n", "start = 188\n", "end = 194\n", "years = []\n", "params = {\n", " 'q': 'influenza', # 'q': 'influenza',
 'facet': 'year',
 'zone': 'newspaper', 
 'l-category': 'Article',
 'key': api_key,
 'encoding': 'json',
 'n': 0
}
for decade in trange(start, end):
 params['l-decade'] = decade
 response = requests.get('http://api.trove.nla.gov.au/v2/result', params=params)
 data = response.json()
 years += data['response']['zone'][0]['facets']['facet']['term']
df = pd.DataFrame(years)
df.head() 'q': 'influenza',
 'facet': 'title',
 'l-category': 'Article',
 'zone': 'newspaper', 
 'key': api_key,
 'encoding': 'json',
 'n': 0
}