{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "# Explore harvested text files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[nltk_data] Downloading package stopwords to /home/tim/nltk_data...\n",
      "[nltk_data]   Package stopwords is already up-to-date!\n",
      "[nltk_data] Downloading package punkt to /home/tim/nltk_data...\n",
      "[nltk_data]   Package punkt is already up-to-date!\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import zipfile\n",
    "from pathlib import Path\n",
    "\n",
    "import altair as alt\n",
    "import nltk\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "\n",
    "nltk.download(\"stopwords\")\n",
    "nltk.download(\"punkt\")\n",
    "\n",
    "stopwords = nltk.corpus.stopwords.words(\"english\")\n",
    "stopwords += [\"tho\", \"tbe\"]\n",
    "\n",
    "# Are you using Jupyter Lab?\n",
    "# If so either don't run this cell or comment out the line below\n",
    "\n",
    "# alt.renderers.enable('notebook')\n",
    "\n",
    "# If you forget, run this cell, and then get strange warnings when you make a chart,\n",
    "# uncomment the following line and run this cell to reset the chart renderer\n",
    "\n",
    "# alt.renderers.enable('default')\n",
    "\n",
    "# alt.data_transformers.enable('json')\n",
    "# nltk.download('stopwords')\n",
    "# nltk.download('punkt')\n",
    "# stopwords = nltk.corpus.stopwords.words('english')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%capture\n",
    "# Load variables from the .env file if it exists\n",
    "# Use %%capture to suppress messages\n",
    "%load_ext dotenv\n",
    "%dotenv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": [
     "nbval-skip"
    ]
   },
   "outputs": [],
   "source": [
    "# Import a harvest zip file you've created previously\n",
    "# First upload the zip file to the data directory, then run this cell\n",
    "\n",
    "for zipped in sorted(Path(\"data\").glob(\"*.zip\")):\n",
    "    print(f\"Unzipping {zipped}...\")\n",
    "    with zipfile.ZipFile(zipped, \"r\") as zip_file:\n",
    "        zip_file.extractall(Path(f\"data/{zipped.stem}\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def get_latest_harvest():\n",
    "    \"\"\"\n",
    "    Get the timestamp of the most recent harvest.\n",
    "    \"\"\"\n",
    "    harvests = sorted(\n",
    "        [d for d in Path(\"data\").iterdir() if d.is_dir() and not d.name.startswith(\".\")]\n",
    "    )\n",
    "    try:\n",
    "        harvest = harvests[-1]\n",
    "    except IndexError:\n",
    "        print(\"No harvests!\")\n",
    "        harvest = None\n",
    "    return harvest"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def get_docs(harvest):\n",
    "    docs_path = get_docs_path(harvest)\n",
    "    for p in docs_path:\n",
    "        yield p.read_text(encoding=\"utf-8\").strip()\n",
    "\n",
    "\n",
    "def get_docs_path(harvest):\n",
    "    path = Path(harvest, \"text\")\n",
    "    docs_path = [p for p in sorted(path.glob(\"*.txt\"))]\n",
    "    return docs_path\n",
    "\n",
    "\n",
    "def get_file_names(harvest):\n",
    "    return [p.stem for p in get_docs_path(harvest)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# In testing environment, open a test harvest\n",
    "if os.getenv(\"GW_STATUS\") == \"dev\":\n",
    "    harvest = Path(\"data\", \"1655952487\")\n",
    "# Otherwise open most recent harvest\n",
    "# Supply a harvest directory name to open a specific harvest\n",
    "else:\n",
    "    harvest = get_latest_harvest()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "vectorizer = CountVectorizer(\n",
    "    stop_words=stopwords, max_features=10000, ngram_range=(1, 1)\n",
    ")\n",
    "# preprocessor = lambda x: re.sub(r'(\\d[\\d\\.])+', 'NUM', x.lower())\n",
    "X_freq = np.asarray(vectorizer.fit_transform(get_docs(harvest)).todense())\n",
    "df_freq = pd.DataFrame(\n",
    "    X_freq, columns=vectorizer.get_feature_names_out(), index=get_file_names(harvest)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "st          68466\n",
       "street      62029\n",
       "good        41017\n",
       "rooms       39883\n",
       "new         32997\n",
       "apply       30852\n",
       "mr          30242\n",
       "co          28497\n",
       "wanted      25910\n",
       "10          25748\n",
       "room        25505\n",
       "house       25309\n",
       "sale        24637\n",
       "office      22547\n",
       "per         21527\n",
       "two         19313\n",
       "terms       18843\n",
       "one         18480\n",
       "land        18250\n",
       "brisbane    18066\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_freq.sum().nlargest(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>level_0</th>\n",
       "      <th>level_1</th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>00</td>\n",
       "      <td>18541005-13-4798540</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>00</td>\n",
       "      <td>18550403-13-4806194</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>00</td>\n",
       "      <td>18561031-13-7139235</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>00</td>\n",
       "      <td>18571126-13-7142543</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>00</td>\n",
       "      <td>18580710-13-7297359</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30659995</th>\n",
       "      <td>zu</td>\n",
       "      <td>19541112-969-204759481</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30659996</th>\n",
       "      <td>zu</td>\n",
       "      <td>19541116-12-50619201</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30659997</th>\n",
       "      <td>zu</td>\n",
       "      <td>19541119-470-135256155</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30659998</th>\n",
       "      <td>zu</td>\n",
       "      <td>19870909-11-122120946</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30659999</th>\n",
       "      <td>zu</td>\n",
       "      <td>19880228-11-101979292</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>30660000 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         level_0                 level_1  0\n",
       "0             00     18541005-13-4798540  2\n",
       "1             00     18550403-13-4806194  0\n",
       "2             00     18561031-13-7139235  0\n",
       "3             00     18571126-13-7142543  0\n",
       "4             00     18580710-13-7297359  3\n",
       "...          ...                     ... ..\n",
       "30659995      zu  19541112-969-204759481  0\n",
       "30659996      zu    19541116-12-50619201  0\n",
       "30659997      zu  19541119-470-135256155  0\n",
       "30659998      zu   19870909-11-122120946  0\n",
       "30659999      zu   19880228-11-101979292  0\n",
       "\n",
       "[30660000 rows x 3 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_freq.unstack().to_frame().reset_index().dropna(axis=0, subset=[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 6.34 s, sys: 98 µs, total: 6.34 s\n",
      "Wall time: 6.34 s\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>9</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>18541005-13-4798540</th>\n",
       "      <td>mr</td>\n",
       "      <td>street</td>\n",
       "      <td>bo</td>\n",
       "      <td>co</td>\n",
       "      <td>melbourne</td>\n",
       "      <td>per</td>\n",
       "      <td>hotel</td>\n",
       "      <td>day</td>\n",
       "      <td>near</td>\n",
       "      <td>mrs</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18550403-13-4806194</th>\n",
       "      <td>john</td>\n",
       "      <td>wm</td>\n",
       "      <td>james</td>\n",
       "      <td>mrs</td>\n",
       "      <td>geo</td>\n",
       "      <td>thos</td>\n",
       "      <td>thomas</td>\n",
       "      <td>henry</td>\n",
       "      <td>miss</td>\n",
       "      <td>jno</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18561031-13-7139235</th>\n",
       "      <td>street</td>\n",
       "      <td>nov</td>\n",
       "      <td>mr</td>\n",
       "      <td>sale</td>\n",
       "      <td>apply</td>\n",
       "      <td>land</td>\n",
       "      <td>co</td>\n",
       "      <td>near</td>\n",
       "      <td>let</td>\n",
       "      <td>east</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18571126-13-7142543</th>\n",
       "      <td>machine</td>\n",
       "      <td>made</td>\n",
       "      <td>large</td>\n",
       "      <td>messrs</td>\n",
       "      <td>one</td>\n",
       "      <td>year</td>\n",
       "      <td>two</td>\n",
       "      <td>iron</td>\n",
       "      <td>prizes</td>\n",
       "      <td>three</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18580710-13-7297359</th>\n",
       "      <td>july</td>\n",
       "      <td>12</td>\n",
       "      <td>street</td>\n",
       "      <td>sale</td>\n",
       "      <td>clock</td>\n",
       "      <td>sell</td>\n",
       "      <td>co</td>\n",
       "      <td>auction</td>\n",
       "      <td>terms</td>\n",
       "      <td>monday</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                           0       1       2       3          4     5       6  \\\n",
       "18541005-13-4798540       mr  street      bo      co  melbourne   per   hotel   \n",
       "18550403-13-4806194     john      wm   james     mrs        geo  thos  thomas   \n",
       "18561031-13-7139235   street     nov      mr    sale      apply  land      co   \n",
       "18571126-13-7142543  machine    made   large  messrs        one  year     two   \n",
       "18580710-13-7297359     july      12  street    sale      clock  sell      co   \n",
       "\n",
       "                           7       8       9  \n",
       "18541005-13-4798540      day    near     mrs  \n",
       "18550403-13-4806194    henry    miss     jno  \n",
       "18561031-13-7139235     near     let    east  \n",
       "18571126-13-7142543     iron  prizes   three  \n",
       "18580710-13-7297359  auction   terms  monday  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "# The number of words you want to show\n",
    "num_words = 10\n",
    "top_words = pd.DataFrame(\n",
    "    {\n",
    "        n: df_freq.T[col].nlargest(num_words).index.tolist()\n",
    "        for n, col in enumerate(df_freq.T)\n",
    "    }\n",
    ").T\n",
    "top_words.index = get_file_names(harvest)\n",
    "top_words.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>18541005-13-4798540</th>\n",
       "      <th>18550403-13-4806194</th>\n",
       "      <th>18561031-13-7139235</th>\n",
       "      <th>18571126-13-7142543</th>\n",
       "      <th>18580710-13-7297359</th>\n",
       "      <th>18590407-13-5679082</th>\n",
       "      <th>18590520-13-5681431</th>\n",
       "      <th>18590524-809-154839403</th>\n",
       "      <th>18590812-67-60405583</th>\n",
       "      <th>18640227-13-5744865</th>\n",
       "      <th>...</th>\n",
       "      <th>19530604-97-62492704</th>\n",
       "      <th>19530822-35-18381792</th>\n",
       "      <th>19531009-687-145667588</th>\n",
       "      <th>19531015-379-100665477</th>\n",
       "      <th>19540424-77-57316830</th>\n",
       "      <th>19541112-969-204759481</th>\n",
       "      <th>19541116-12-50619201</th>\n",
       "      <th>19541119-470-135256155</th>\n",
       "      <th>19870909-11-122120946</th>\n",
       "      <th>19880228-11-101979292</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>00</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>000</th>\n",
       "      <td>3</td>\n",
       "      <td>11</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>25</td>\n",
       "      <td>12</td>\n",
       "      <td>35</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>18</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>262</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>001</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>009</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>01</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>yy</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zealand</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zeehan</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zinc</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>zu</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10000 rows × 3066 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         18541005-13-4798540  18550403-13-4806194  18561031-13-7139235  \\\n",
       "00                         2                    0                    0   \n",
       "000                        3                   11                    5   \n",
       "001                        0                    0                    0   \n",
       "009                        0                    0                    0   \n",
       "01                         1                    1                    0   \n",
       "...                      ...                  ...                  ...   \n",
       "yy                         0                    0                    0   \n",
       "zealand                    0                    0                    0   \n",
       "zeehan                     0                    0                    0   \n",
       "zinc                       0                    0                    0   \n",
       "zu                         0                    0                    0   \n",
       "\n",
       "         18571126-13-7142543  18580710-13-7297359  18590407-13-5679082  \\\n",
       "00                         0                    3                    3   \n",
       "000                        3                   25                   12   \n",
       "001                        0                    0                    1   \n",
       "009                        0                    0                    0   \n",
       "01                         0                    0                    0   \n",
       "...                      ...                  ...                  ...   \n",
       "yy                         0                    1                    0   \n",
       "zealand                    0                    4                    1   \n",
       "zeehan                     0                    0                    0   \n",
       "zinc                       0                    3                    3   \n",
       "zu                         0                    0                    0   \n",
       "\n",
       "         18590520-13-5681431  18590524-809-154839403  18590812-67-60405583  \\\n",
       "00                         4                       1                     0   \n",
       "000                       35                       8                     0   \n",
       "001                        0                       0                     0   \n",
       "009                        0                       0                     0   \n",
       "01                         1                       1                     0   \n",
       "...                      ...                     ...                   ...   \n",
       "yy                         0                       1                     0   \n",
       "zealand                    1                       1                     0   \n",
       "zeehan                     0                       0                     0   \n",
       "zinc                       0                       0                     0   \n",
       "zu                         0                       0                     0   \n",
       "\n",
       "         18640227-13-5744865  ...  19530604-97-62492704  19530822-35-18381792  \\\n",
       "00                         5  ...                     0                    13   \n",
       "000                       18  ...                     0                   262   \n",
       "001                        0  ...                     0                     0   \n",
       "009                        0  ...                     0                     0   \n",
       "01                         1  ...                     0                     6   \n",
       "...                      ...  ...                   ...                   ...   \n",
       "yy                         0  ...                     0                     0   \n",
       "zealand                    6  ...                     0                     1   \n",
       "zeehan                     0  ...                     0                     0   \n",
       "zinc                       0  ...                     0                     4   \n",
       "zu                         0  ...                     0                     0   \n",
       "\n",
       "         19531009-687-145667588  19531015-379-100665477  19540424-77-57316830  \\\n",
       "00                            0                       0                     0   \n",
       "000                           6                       0                    16   \n",
       "001                           0                       0                     0   \n",
       "009                           0                       0                     0   \n",
       "01                            0                       1                     0   \n",
       "...                         ...                     ...                   ...   \n",
       "yy                            0                       0                     0   \n",
       "zealand                       0                       0                     0   \n",
       "zeehan                        0                       0                     0   \n",
       "zinc                          0                       0                     0   \n",
       "zu                            0                       0                     0   \n",
       "\n",
       "         19541112-969-204759481  19541116-12-50619201  19541119-470-135256155  \\\n",
       "00                            0                     0                       0   \n",
       "000                           0                     0                       1   \n",
       "001                           0                     0                       0   \n",
       "009                           0                     0                       0   \n",
       "01                            0                     0                       0   \n",
       "...                         ...                   ...                     ...   \n",
       "yy                            0                     0                       0   \n",
       "zealand                       0                     0                       0   \n",
       "zeehan                        0                     0                       0   \n",
       "zinc                          0                     0                       0   \n",
       "zu                            0                     0                       0   \n",
       "\n",
       "         19870909-11-122120946  19880228-11-101979292  \n",
       "00                           0                      0  \n",
       "000                          0                      0  \n",
       "001                          0                      0  \n",
       "009                          0                      0  \n",
       "01                           0                      0  \n",
       "...                        ...                    ...  \n",
       "yy                           0                      0  \n",
       "zealand                      0                      0  \n",
       "zeehan                       0                      0  \n",
       "zinc                         0                      0  \n",
       "zu                           0                      0  \n",
       "\n",
       "[10000 rows x 3066 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_freq.T"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "## Add a 'year' column to the dataframe\n",
    "\n",
    "Each file name includes the date on which the article was published. For example, `18601224-13-5696044` was published on 24 December 1860. We can easily extract the year by just slicing the first four characters off the index."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "df_freq[\"article_year\"] = df_freq.index.str.slice(0, 4)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "## Most frequent words each year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Group by year and sum the word counts\n",
    "year_groups = df_freq.groupby(by=\"article_year\")\n",
    "year_group_totals = year_groups.sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# Reshape so that we have columns for year, word, and count\n",
    "words_by_year = year_group_totals.unstack().to_frame().reset_index()\n",
    "words_by_year.columns = [\"word\", \"year\", \"count\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "top_words_by_year = (\n",
    "    words_by_year.sort_values(\"count\", ascending=False)\n",
    "    .groupby(by=[\"year\"])\n",
    "    .head(10)\n",
    "    .reset_index(drop=True)\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "word\n",
       "street       55\n",
       "mr           41\n",
       "st           38\n",
       "good         35\n",
       "new          31\n",
       "co           28\n",
       "rooms        24\n",
       "10           22\n",
       "mrs          21\n",
       "one          17\n",
       "apply        17\n",
       "sale         14\n",
       "years        13\n",
       "office       13\n",
       "wanted       12\n",
       "per          10\n",
       "room         10\n",
       "11           10\n",
       "house         9\n",
       "loving        8\n",
       "would         8\n",
       "brisbane      8\n",
       "may           8\n",
       "day           8\n",
       "melbourne     7\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_words_by_year[\"word\"].value_counts()[:25]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "## Visualise top ten words per year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-a6c421af9fd24bcc9259fc864333a18a.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-a6c421af9fd24bcc9259fc864333a18a.vega-embed details,\n",
       "  #altair-viz-a6c421af9fd24bcc9259fc864333a18a.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-a6c421af9fd24bcc9259fc864333a18a\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-a6c421af9fd24bcc9259fc864333a18a\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-a6c421af9fd24bcc9259fc864333a18a\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.14.1?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.14.1\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-d5049957a2eaa774febfc1a31090f2a2\"}, \"mark\": {\"type\": \"bar\"}, \"encoding\": {\"facet\": {\"columns\": 4, \"field\": \"year\", \"type\": \"nominal\"}, \"x\": {\"field\": \"count\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"word\", \"sort\": \"-x\", \"type\": \"nominal\"}}, \"height\": 120, \"resolve\": {\"scale\": {\"x\": \"independent\", \"y\": \"independent\"}}, \"width\": 120, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.14.1.json\", \"datasets\": {\"data-d5049957a2eaa774febfc1a31090f2a2\": [{\"word\": \"st\", \"year\": \"1924\", \"count\": 13765}, {\"word\": \"street\", \"year\": \"1902\", \"count\": 11766}, {\"word\": \"pte\", \"year\": \"1941\", \"count\": 9366}, {\"word\": \"mr\", \"year\": \"1902\", \"count\": 8925}, {\"word\": \"st\", \"year\": \"1902\", \"count\": 8883}, {\"word\": \"st\", \"year\": \"1925\", \"count\": 6473}, {\"word\": \"street\", \"year\": \"1930\", \"count\": 6126}, {\"word\": \"apply\", \"year\": \"1924\", \"count\": 5806}, {\"word\": \"co\", \"year\": \"1902\", \"count\": 5794}, {\"word\": \"rooms\", \"year\": \"1924\", \"count\": 5739}, {\"word\": \"good\", \"year\": \"1902\", \"count\": 5601}, {\"word\": \"per\", \"year\": \"1902\", \"count\": 5534}, {\"word\": \"good\", \"year\": \"1924\", \"count\": 5487}, {\"word\": \"rooms\", \"year\": \"1930\", \"count\": 5419}, {\"word\": \"new\", \"year\": \"1902\", \"count\": 5187}, {\"word\": \"street\", \"year\": \"1926\", \"count\": 5063}, {\"word\": \"st\", \"year\": \"1930\", \"count\": 5009}, {\"word\": \"street\", \"year\": \"1903\", \"count\": 4836}, {\"word\": \"mr\", \"year\": \"1903\", \"count\": 4813}, {\"word\": \"wanted\", \"year\": \"1924\", \"count\": 4802}, {\"word\": \"10\", \"year\": \"1902\", \"count\": 4722}, {\"word\": \"room\", \"year\": \"1924\", \"count\": 4528}, {\"word\": \"11\", \"year\": \"1902\", \"count\": 4392}, {\"word\": \"rooms\", \"year\": \"1902\", \"count\": 4347}, {\"word\": \"street\", \"year\": \"1929\", \"count\": 4320}, {\"word\": \"new\", \"year\": \"1924\", \"count\": 4286}, {\"word\": \"office\", \"year\": \"1924\", \"count\": 4220}, {\"word\": \"st\", \"year\": \"1919\", \"count\": 3994}, {\"word\": \"st\", \"year\": \"1929\", \"count\": 3931}, {\"word\": \"street\", \"year\": \"1935\", \"count\": 3874}, {\"word\": \"deposit\", \"year\": \"1924\", \"count\": 3789}, {\"word\": \"light\", \"year\": \"1924\", \"count\": 3748}, {\"word\": \"st\", \"year\": \"1903\", \"count\": 3426}, {\"word\": \"rooms\", \"year\": \"1929\", \"count\": 3321}, {\"word\": \"st\", \"year\": \"1921\", \"count\": 3186}, {\"word\": \"apply\", \"year\": \"1925\", \"count\": 3181}, {\"word\": \"good\", \"year\": \"1925\", \"count\": 3015}, {\"word\": \"room\", \"year\": \"1930\", \"count\": 2968}, {\"word\": \"kitchen\", \"year\": \"1930\", \"count\": 2930}, {\"word\": \"good\", \"year\": \"1930\", \"count\": 2919}, {\"word\": \"cottage\", \"year\": \"1930\", \"count\": 2899}, {\"word\": \"wanted\", \"year\": \"1925\", \"count\": 2879}, {\"word\": \"land\", \"year\": \"1930\", \"count\": 2775}, {\"word\": \"brick\", \"year\": \"1930\", \"count\": 2684}, {\"word\": \"office\", \"year\": \"1925\", \"count\": 2626}, {\"word\": \"new\", \"year\": \"1903\", \"count\": 2539}, {\"word\": \"co\", \"year\": \"1903\", \"count\": 2513}, {\"word\": \"street\", \"year\": \"1928\", \"count\": 2467}, {\"word\": \"room\", \"year\": \"1929\", \"count\": 2452}, {\"word\": \"good\", \"year\": \"1929\", \"count\": 2437}, {\"word\": \"good\", \"year\": \"1903\", \"count\": 2372}, {\"word\": \"per\", \"year\": \"1903\", \"count\": 2366}, {\"word\": \"new\", \"year\": \"1925\", \"count\": 2326}, {\"word\": \"rooms\", \"year\": \"1925\", \"count\": 2254}, {\"word\": \"10\", \"year\": \"1930\", \"count\": 2137}, {\"word\": \"10\", \"year\": \"1903\", \"count\": 2117}, {\"word\": \"sale\", \"year\": \"1903\", \"count\": 2100}, {\"word\": \"brisbane\", \"year\": \"1925\", \"count\": 2086}, {\"word\": \"st\", \"year\": \"1913\", \"count\": 2072}, {\"word\": \"house\", \"year\": \"1925\", \"count\": 2040}, {\"word\": \"apply\", \"year\": \"1926\", \"count\": 2020}, {\"word\": \"one\", \"year\": \"1903\", \"count\": 1986}, {\"word\": \"kitchen\", \"year\": \"1929\", \"count\": 1979}, {\"word\": \"deposit\", \"year\": \"1925\", \"count\": 1976}, {\"word\": \"rooms\", \"year\": \"1921\", \"count\": 1894}, {\"word\": \"brick\", \"year\": \"1929\", \"count\": 1859}, {\"word\": \"co\", \"year\": \"1929\", \"count\": 1858}, {\"word\": \"rooms\", \"year\": \"1919\", \"count\": 1848}, {\"word\": \"land\", \"year\": \"1929\", \"count\": 1809}, {\"word\": \"st\", \"year\": \"1920\", \"count\": 1780}, {\"word\": \"apply\", \"year\": \"1929\", \"count\": 1743}, {\"word\": \"st\", \"year\": \"1915\", \"count\": 1739}, {\"word\": \"good\", \"year\": \"1919\", \"count\": 1724}, {\"word\": \"good\", \"year\": \"1926\", \"count\": 1684}, {\"word\": \"office\", \"year\": \"1926\", \"count\": 1654}, {\"word\": \"st\", \"year\": \"1922\", \"count\": 1639}, {\"word\": \"wanted\", \"year\": \"1926\", \"count\": 1631}, {\"word\": \"good\", \"year\": \"1935\", \"count\": 1553}, {\"word\": \"rooms\", \"year\": \"1926\", \"count\": 1535}, {\"word\": \"street\", \"year\": \"1921\", \"count\": 1494}, {\"word\": \"wanted\", \"year\": \"1935\", \"count\": 1449}, {\"word\": \"apply\", \"year\": \"1919\", \"count\": 1441}, {\"word\": \"house\", \"year\": \"1926\", \"count\": 1437}, {\"word\": \"good\", \"year\": \"1921\", \"count\": 1433}, {\"word\": \"room\", \"year\": \"1926\", \"count\": 1418}, {\"word\": \"mr\", \"year\": \"1913\", \"count\": 1413}, {\"word\": \"new\", \"year\": \"1935\", \"count\": 1351}, {\"word\": \"room\", \"year\": \"1928\", \"count\": 1336}, {\"word\": \"telegraph\", \"year\": \"1935\", \"count\": 1326}, {\"word\": \"new\", \"year\": \"1926\", \"count\": 1302}, {\"word\": \"light\", \"year\": \"1926\", \"count\": 1258}, {\"word\": \"sedan\", \"year\": \"1953\", \"count\": 1256}, {\"word\": \"street\", \"year\": \"1901\", \"count\": 1254}, {\"word\": \"street\", \"year\": \"1913\", \"count\": 1245}, {\"word\": \"rooms\", \"year\": \"1928\", \"count\": 1240}, {\"word\": \"room\", \"year\": \"1921\", \"count\": 1222}, {\"word\": \"apply\", \"year\": \"1921\", \"count\": 1219}, {\"word\": \"rooms\", \"year\": \"1913\", \"count\": 1195}, {\"word\": \"terms\", \"year\": \"1935\", \"count\": 1177}, {\"word\": \"st\", \"year\": \"1923\", \"count\": 1172}, {\"word\": \"st\", \"year\": \"1953\", \"count\": 1138}, {\"word\": \"sale\", \"year\": \"1919\", \"count\": 1119}, {\"word\": \"office\", \"year\": \"1919\", \"count\": 1113}, {\"word\": \"wanted\", \"year\": \"1921\", \"count\": 1110}, {\"word\": \"new\", \"year\": \"1919\", \"count\": 1097}, {\"word\": \"good\", \"year\": \"1953\", \"count\": 1088}, {\"word\": \"6d\", \"year\": \"1935\", \"count\": 1076}, {\"word\": \"new\", \"year\": \"1921\", \"count\": 1058}, {\"word\": \"brisbane\", \"year\": \"1935\", \"count\": 1029}, {\"word\": \"house\", \"year\": \"1919\", \"count\": 1028}, {\"word\": \"phone\", \"year\": \"1935\", \"count\": 998}, {\"word\": \"good\", \"year\": \"1928\", \"count\": 995}, {\"word\": \"rooms\", \"year\": \"1922\", \"count\": 990}, {\"word\": \"house\", \"year\": \"1935\", \"count\": 984}, {\"word\": \"private\", \"year\": \"1915\", \"count\": 983}, {\"word\": \"street\", \"year\": \"1915\", \"count\": 981}, {\"word\": \"office\", \"year\": \"1921\", \"count\": 978}, {\"word\": \"new\", \"year\": \"1953\", \"count\": 969}, {\"word\": \"water\", \"year\": \"1919\", \"count\": 956}, {\"word\": \"st\", \"year\": \"1901\", \"count\": 956}, {\"word\": \"wanted\", \"year\": \"1919\", \"count\": 950}, {\"word\": \"mr\", \"year\": \"1906\", \"count\": 949}, {\"word\": \"cs\", \"year\": \"1950\", \"count\": 923}, {\"word\": \"mr\", \"year\": \"1914\", \"count\": 914}, {\"word\": \"street\", \"year\": \"1906\", \"count\": 913}, {\"word\": \"street\", \"year\": \"1922\", \"count\": 907}, {\"word\": \"street\", \"year\": \"1905\", \"count\": 899}, {\"word\": \"good\", \"year\": \"1922\", \"count\": 894}, {\"word\": \"room\", \"year\": \"1922\", \"count\": 890}, {\"word\": \"street\", \"year\": \"1938\", \"count\": 883}, {\"word\": \"wanted\", \"year\": \"1922\", \"count\": 882}, {\"word\": \"wanted\", \"year\": \"1920\", \"count\": 875}, {\"word\": \"deposit\", \"year\": \"1921\", \"count\": 872}, {\"word\": \"apply\", \"year\": \"1928\", \"count\": 865}, {\"word\": \"mr\", \"year\": \"1901\", \"count\": 853}, {\"word\": \"street\", \"year\": \"1933\", \"count\": 846}, {\"word\": \"battalion\", \"year\": \"1915\", \"count\": 842}, {\"word\": \"good\", \"year\": \"1913\", \"count\": 836}, {\"word\": \"land\", \"year\": \"1928\", \"count\": 828}, {\"word\": \"rooms\", \"year\": \"1915\", \"count\": 825}, {\"word\": \"rooms\", \"year\": \"1920\", \"count\": 822}, {\"word\": \"apply\", \"year\": \"1922\", \"count\": 816}, {\"word\": \"apply\", \"year\": \"1920\", \"count\": 813}, {\"word\": \"10\", \"year\": \"1913\", \"count\": 809}, {\"word\": \"gold\", \"year\": \"1901\", \"count\": 790}, {\"word\": \"kitchen\", \"year\": \"1928\", \"count\": 778}, {\"word\": \"mr\", \"year\": \"1904\", \"count\": 776}, {\"word\": \"brick\", \"year\": \"1928\", \"count\": 769}, {\"word\": \"good\", \"year\": \"1920\", \"count\": 768}, {\"word\": \"street\", \"year\": \"1927\", \"count\": 760}, {\"word\": \"der\", \"year\": \"1910\", \"count\": 754}, {\"word\": \"new\", \"year\": \"1901\", \"count\": 736}, {\"word\": \"mrs\", \"year\": \"1913\", \"count\": 736}, {\"word\": \"good\", \"year\": \"1923\", \"count\": 722}, {\"word\": \"good\", \"year\": \"1915\", \"count\": 721}, {\"word\": \"street\", \"year\": \"1939\", \"count\": 698}, {\"word\": \"street\", \"year\": \"1931\", \"count\": 687}, {\"word\": \"inf\", \"year\": \"1941\", \"count\": 686}, {\"word\": \"co\", \"year\": \"1928\", \"count\": 685}, {\"word\": \"road\", \"year\": \"1928\", \"count\": 685}, {\"word\": \"good\", \"year\": \"1901\", \"count\": 683}, {\"word\": \"co\", \"year\": \"1901\", \"count\": 677}, {\"word\": \"house\", \"year\": \"1913\", \"count\": 668}, {\"word\": \"per\", \"year\": \"1901\", \"count\": 659}, {\"word\": \"ford\", \"year\": \"1953\", \"count\": 652}, {\"word\": \"apply\", \"year\": \"1915\", \"count\": 650}, {\"word\": \"st\", \"year\": \"1906\", \"count\": 643}, {\"word\": \"may\", \"year\": \"1913\", \"count\": 637}, {\"word\": \"street\", \"year\": \"1914\", \"count\": 634}, {\"word\": \"sale\", \"year\": \"1901\", \"count\": 616}, {\"word\": \"10\", \"year\": \"1901\", \"count\": 607}, {\"word\": \"rd\", \"year\": \"1953\", \"count\": 605}, {\"word\": \"adelaide\", \"year\": \"1913\", \"count\": 599}, {\"word\": \"co\", \"year\": \"1922\", \"count\": 599}, {\"word\": \"die\", \"year\": \"1910\", \"count\": 592}, {\"word\": \"mr\", \"year\": \"1910\", \"count\": 589}, {\"word\": \"st\", \"year\": \"1932\", \"count\": 589}, {\"word\": \"st\", \"year\": \"1910\", \"count\": 588}, {\"word\": \"car\", \"year\": \"1953\", \"count\": 586}, {\"word\": \"reg\", \"year\": \"1953\", \"count\": 585}, {\"word\": \"new\", \"year\": \"1922\", \"count\": 583}, {\"word\": \"st\", \"year\": \"1911\", \"count\": 578}, {\"word\": \"room\", \"year\": \"1915\", \"count\": 569}, {\"word\": \"room\", \"year\": \"1920\", \"count\": 568}, {\"word\": \"mr\", \"year\": \"1907\", \"count\": 568}, {\"word\": \"mr\", \"year\": \"1912\", \"count\": 565}, {\"word\": \"rooms\", \"year\": \"1923\", \"count\": 563}, {\"word\": \"rooms\", \"year\": \"1938\", \"count\": 552}, {\"word\": \"co\", \"year\": \"1906\", \"count\": 551}, {\"word\": \"10\", \"year\": \"1953\", \"count\": 549}, {\"word\": \"cs\", \"year\": \"1941\", \"count\": 534}, {\"word\": \"new\", \"year\": \"1939\", \"count\": 531}, {\"word\": \"mr\", \"year\": \"1923\", \"count\": 523}, {\"word\": \"street\", \"year\": \"1953\", \"count\": 520}, {\"word\": \"rain\", \"year\": \"1937\", \"count\": 518}, {\"word\": \"good\", \"year\": \"1911\", \"count\": 507}, {\"word\": \"und\", \"year\": \"1910\", \"count\": 497}, {\"word\": \"house\", \"year\": \"1920\", \"count\": 494}, {\"word\": \"brisbane\", \"year\": \"1920\", \"count\": 491}, {\"word\": \"february\", \"year\": \"1938\", \"count\": 488}, {\"word\": \"years\", \"year\": \"1910\", \"count\": 486}, {\"word\": \"st\", \"year\": \"1905\", \"count\": 485}, {\"word\": \"mr\", \"year\": \"1908\", \"count\": 484}, {\"word\": \"office\", \"year\": \"1923\", \"count\": 481}, {\"word\": \"mr\", \"year\": \"1911\", \"count\": 480}, {\"word\": \"would\", \"year\": \"1914\", \"count\": 479}, {\"word\": \"st\", \"year\": \"1931\", \"count\": 473}, {\"word\": \"wanted\", \"year\": \"1923\", \"count\": 472}, {\"word\": \"light\", \"year\": \"1922\", \"count\": 472}, {\"word\": \"loving\", \"year\": \"1938\", \"count\": 469}, {\"word\": \"john\", \"year\": \"1855\", \"count\": 463}, {\"word\": \"street\", \"year\": \"1911\", \"count\": 463}, {\"word\": \"new\", \"year\": \"1923\", \"count\": 461}, {\"word\": \"office\", \"year\": \"1920\", \"count\": 459}, {\"word\": \"wanted\", \"year\": \"1911\", \"count\": 458}, {\"word\": \"new\", \"year\": \"1915\", \"count\": 456}, {\"word\": \"years\", \"year\": \"1923\", \"count\": 456}, {\"word\": \"wx\", \"year\": \"1941\", \"count\": 455}, {\"word\": \"apply\", \"year\": \"1923\", \"count\": 454}, {\"word\": \"cottage\", \"year\": \"1915\", \"count\": 453}, {\"word\": \"st\", \"year\": \"1904\", \"count\": 452}, {\"word\": \"st\", \"year\": \"1939\", \"count\": 447}, {\"word\": \"street\", \"year\": \"1923\", \"count\": 446}, {\"word\": \"sale\", \"year\": \"1920\", \"count\": 444}, {\"word\": \"street\", \"year\": \"1918\", \"count\": 440}, {\"word\": \"good\", \"year\": \"1914\", \"count\": 439}, {\"word\": \"good\", \"year\": \"1931\", \"count\": 435}, {\"word\": \"10\", \"year\": \"1904\", \"count\": 434}, {\"word\": \"perth\", \"year\": \"1906\", \"count\": 430}, {\"word\": \"10\", \"year\": \"1939\", \"count\": 426}, {\"word\": \"apply\", \"year\": \"1911\", \"count\": 426}, {\"word\": \"st\", \"year\": \"1912\", \"count\": 424}, {\"word\": \"10\", \"year\": \"1905\", \"count\": 423}, {\"word\": \"john\", \"year\": \"1908\", \"count\": 409}, {\"word\": \"ltd\", \"year\": \"1950\", \"count\": 408}, {\"word\": \"brick\", \"year\": \"1938\", \"count\": 397}, {\"word\": \"land\", \"year\": \"1938\", \"count\": 397}, {\"word\": \"rooms\", \"year\": \"1931\", \"count\": 397}, {\"word\": \"street\", \"year\": \"1859\", \"count\": 396}, {\"word\": \"sale\", \"year\": \"1906\", \"count\": 393}, {\"word\": \"good\", \"year\": \"1906\", \"count\": 393}, {\"word\": \"street\", \"year\": \"1932\", \"count\": 391}, {\"word\": \"one\", \"year\": \"1904\", \"count\": 391}, {\"word\": \"street\", \"year\": \"1908\", \"count\": 383}, {\"word\": \"per\", \"year\": \"1904\", \"count\": 380}, {\"word\": \"st\", \"year\": \"1938\", \"count\": 375}, {\"word\": \"11\", \"year\": \"1908\", \"count\": 373}, {\"word\": \"road\", \"year\": \"1938\", \"count\": 373}, {\"word\": \"apply\", \"year\": \"1906\", \"count\": 369}, {\"word\": \"city\", \"year\": \"1918\", \"count\": 368}, {\"word\": \"mrs\", \"year\": \"1906\", \"count\": 368}, {\"word\": \"rooms\", \"year\": \"1906\", \"count\": 367}, {\"word\": \"mrs\", \"year\": \"1905\", \"count\": 367}, {\"word\": \"good\", \"year\": \"1938\", \"count\": 367}, {\"word\": \"11\", \"year\": \"1911\", \"count\": 365}, {\"word\": \"may\", \"year\": \"1912\", \"count\": 361}, {\"word\": \"mr\", \"year\": \"1938\", \"count\": 359}, {\"word\": \"co\", \"year\": \"1904\", \"count\": 358}, {\"word\": \"years\", \"year\": \"1914\", \"count\": 355}, {\"word\": \"co\", \"year\": \"1939\", \"count\": 354}, {\"word\": \"street\", \"year\": \"1904\", \"count\": 354}, {\"word\": \"wm\", \"year\": \"1855\", \"count\": 353}, {\"word\": \"would\", \"year\": \"1904\", \"count\": 346}, {\"word\": \"rooms\", \"year\": \"1932\", \"count\": 345}, {\"word\": \"10\", \"year\": \"1912\", \"count\": 341}, {\"word\": \"11\", \"year\": \"1904\", \"count\": 339}, {\"word\": \"year\", \"year\": \"1914\", \"count\": 336}, {\"word\": \"new\", \"year\": \"1905\", \"count\": 334}, {\"word\": \"new\", \"year\": \"1933\", \"count\": 329}, {\"word\": \"adelaide\", \"year\": \"1911\", \"count\": 326}, {\"word\": \"rooms\", \"year\": \"1939\", \"count\": 323}, {\"word\": \"mrs\", \"year\": \"1914\", \"count\": 320}, {\"word\": \"street\", \"year\": \"1916\", \"count\": 320}, {\"word\": \"loving\", \"year\": \"1937\", \"count\": 319}, {\"word\": \"good\", \"year\": \"1939\", \"count\": 314}, {\"word\": \"years\", \"year\": \"1911\", \"count\": 312}, {\"word\": \"street\", \"year\": \"1936\", \"count\": 311}, {\"word\": \"co\", \"year\": \"1905\", \"count\": 310}, {\"word\": \"mrs\", \"year\": \"1911\", \"count\": 309}, {\"word\": \"good\", \"year\": \"1910\", \"count\": 307}, {\"word\": \"good\", \"year\": \"1933\", \"count\": 303}, {\"word\": \"rooms\", \"year\": \"1905\", \"count\": 303}, {\"word\": \"rev\", \"year\": \"1914\", \"count\": 300}, {\"word\": \"one\", \"year\": \"1912\", \"count\": 299}, {\"word\": \"year\", \"year\": \"1904\", \"count\": 298}, {\"word\": \"drought\", \"year\": \"1914\", \"count\": 297}, {\"word\": \"ltd\", \"year\": \"1939\", \"count\": 297}, {\"word\": \"two\", \"year\": \"1912\", \"count\": 294}, {\"word\": \"sydney\", \"year\": \"1939\", \"count\": 294}, {\"word\": \"rooms\", \"year\": \"1916\", \"count\": 293}, {\"word\": \"st\", \"year\": \"1908\", \"count\": 291}, {\"word\": \"new\", \"year\": \"1931\", \"count\": 290}, {\"word\": \"10\", \"year\": \"1932\", \"count\": 290}, {\"word\": \"street\", \"year\": \"1912\", \"count\": 289}, {\"word\": \"william\", \"year\": \"1908\", \"count\": 289}, {\"word\": \"day\", \"year\": \"1939\", \"count\": 287}, {\"word\": \"mr\", \"year\": \"1905\", \"count\": 287}, {\"word\": \"street\", \"year\": \"1910\", \"count\": 286}, {\"word\": \"one\", \"year\": \"1914\", \"count\": 285}, {\"word\": \"price\", \"year\": \"1931\", \"count\": 285}, {\"word\": \"10\", \"year\": \"1910\", \"count\": 285}, {\"word\": \"bgs\", \"year\": \"1941\", \"count\": 282}, {\"word\": \"good\", \"year\": \"1905\", \"count\": 280}, {\"word\": \"10\", \"year\": \"1931\", \"count\": 276}, {\"word\": \"james\", \"year\": \"1855\", \"count\": 273}, {\"word\": \"loving\", \"year\": \"1945\", \"count\": 271}, {\"word\": \"new\", \"year\": \"1932\", \"count\": 271}, {\"word\": \"ltd\", \"year\": \"1941\", \"count\": 269}, {\"word\": \"von\", \"year\": \"1910\", \"count\": 267}, {\"word\": \"time\", \"year\": \"1907\", \"count\": 265}, {\"word\": \"bgs\", \"year\": \"1950\", \"count\": 265}, {\"word\": \"mr\", \"year\": \"1937\", \"count\": 264}, {\"word\": \"cottage\", \"year\": \"1931\", \"count\": 260}, {\"word\": \"position\", \"year\": \"1931\", \"count\": 258}, {\"word\": \"co\", \"year\": \"1941\", \"count\": 256}, {\"word\": \"10\", \"year\": \"1908\", \"count\": 254}, {\"word\": \"apply\", \"year\": \"1905\", \"count\": 251}, {\"word\": \"office\", \"year\": \"1933\", \"count\": 247}, {\"word\": \"street\", \"year\": \"1917\", \"count\": 246}, {\"word\": \"sydney\", \"year\": \"1932\", \"count\": 245}, {\"word\": \"good\", \"year\": \"1907\", \"count\": 243}, {\"word\": \"wanted\", \"year\": \"1933\", \"count\": 242}, {\"word\": \"mrs\", \"year\": \"1908\", \"count\": 241}, {\"word\": \"apply\", \"year\": \"1927\", \"count\": 239}, {\"word\": \"co\", \"year\": \"1931\", \"count\": 239}, {\"word\": \"co\", \"year\": \"1950\", \"count\": 238}, {\"word\": \"mrs\", \"year\": \"1927\", \"count\": 238}, {\"word\": \"march\", \"year\": \"1946\", \"count\": 235}, {\"word\": \"ditto\", \"year\": \"1936\", \"count\": 234}, {\"word\": \"loved\", \"year\": \"1945\", \"count\": 233}, {\"word\": \"mrs\", \"year\": \"1855\", \"count\": 231}, {\"word\": \"good\", \"year\": \"1927\", \"count\": 229}, {\"word\": \"james\", \"year\": \"1908\", \"count\": 229}, {\"word\": \"good\", \"year\": \"1912\", \"count\": 228}, {\"word\": \"last\", \"year\": \"1912\", \"count\": 227}, {\"word\": \"loving\", \"year\": \"1909\", \"count\": 226}, {\"word\": \"mr\", \"year\": \"1898\", \"count\": 225}, {\"word\": \"mr\", \"year\": \"1909\", \"count\": 225}, {\"word\": \"son\", \"year\": \"1908\", \"count\": 223}, {\"word\": \"day\", \"year\": \"1912\", \"count\": 222}, {\"word\": \"furnished\", \"year\": \"1932\", \"count\": 222}, {\"word\": \"last\", \"year\": \"1907\", \"count\": 222}, {\"word\": \"june\", \"year\": \"1937\", \"count\": 220}, {\"word\": \"room\", \"year\": \"1927\", \"count\": 220}, {\"word\": \"day\", \"year\": \"1932\", \"count\": 218}, {\"word\": \"st\", \"year\": \"1916\", \"count\": 218}, {\"word\": \"years\", \"year\": \"1909\", \"count\": 216}, {\"word\": \"phone\", \"year\": \"1933\", \"count\": 209}, {\"word\": \"water\", \"year\": \"1932\", \"count\": 208}, {\"word\": \"one\", \"year\": \"1907\", \"count\": 207}, {\"word\": \"clouds\", \"year\": \"1937\", \"count\": 206}, {\"word\": \"years\", \"year\": \"1907\", \"count\": 205}, {\"word\": \"6d\", \"year\": \"1933\", \"count\": 203}, {\"word\": \"rooms\", \"year\": \"1927\", \"count\": 202}, {\"word\": \"office\", \"year\": \"1927\", \"count\": 201}, {\"word\": \"st\", \"year\": \"1894\", \"count\": 200}, {\"word\": \"phone\", \"year\": \"1932\", \"count\": 200}, {\"word\": \"brisbane\", \"year\": \"1933\", \"count\": 196}, {\"word\": \"pty\", \"year\": \"1950\", \"count\": 194}, {\"word\": \"loving\", \"year\": \"1946\", \"count\": 193}, {\"word\": \"mrs\", \"year\": \"1952\", \"count\": 192}, {\"word\": \"st\", \"year\": \"1918\", \"count\": 190}, {\"word\": \"perth\", \"year\": \"1941\", \"count\": 188}, {\"word\": \"st\", \"year\": \"1917\", \"count\": 188}, {\"word\": \"new\", \"year\": \"1927\", \"count\": 187}, {\"word\": \"april\", \"year\": \"1909\", \"count\": 186}, {\"word\": \"st\", \"year\": \"1893\", \"count\": 185}, {\"word\": \"rains\", \"year\": \"1937\", \"count\": 184}, {\"word\": \"australia\", \"year\": \"1937\", \"count\": 183}, {\"word\": \"dear\", \"year\": \"1909\", \"count\": 182}, {\"word\": \"shippers\", \"year\": \"1941\", \"count\": 181}, {\"word\": \"shippers\", \"year\": \"1950\", \"count\": 181}, {\"word\": \"mrs\", \"year\": \"1945\", \"count\": 181}, {\"word\": \"furnished\", \"year\": \"1933\", \"count\": 181}, {\"word\": \"good\", \"year\": \"1916\", \"count\": 179}, {\"word\": \"street\", \"year\": \"1909\", \"count\": 178}, {\"word\": \"street\", \"year\": \"1866\", \"count\": 178}, {\"word\": \"died\", \"year\": \"1945\", \"count\": 177}, {\"word\": \"house\", \"year\": \"1927\", \"count\": 177}, {\"word\": \"mr\", \"year\": \"1927\", \"count\": 175}, {\"word\": \"south\", \"year\": \"1937\", \"count\": 175}, {\"word\": \"deceased\", \"year\": \"1933\", \"count\": 175}, {\"word\": \"memory\", \"year\": \"1945\", \"count\": 171}, {\"word\": \"co\", \"year\": \"1916\", \"count\": 171}, {\"word\": \"guns\", \"year\": \"1937\", \"count\": 171}, {\"word\": \"memory\", \"year\": \"1937\", \"count\": 170}, {\"word\": \"thos\", \"year\": \"1855\", \"count\": 166}, {\"word\": \"mr\", \"year\": \"1895\", \"count\": 166}, {\"word\": \"geo\", \"year\": \"1855\", \"count\": 166}, {\"word\": \"new\", \"year\": \"1907\", \"count\": 165}, {\"word\": \"house\", \"year\": \"1916\", \"count\": 165}, {\"word\": \"land\", \"year\": \"1916\", \"count\": 164}, {\"word\": \"ctns\", \"year\": \"1950\", \"count\": 162}, {\"word\": \"street\", \"year\": \"1893\", \"count\": 162}, {\"word\": \"st\", \"year\": \"1907\", \"count\": 161}, {\"word\": \"november\", \"year\": \"1945\", \"count\": 161}, {\"word\": \"thomas\", \"year\": \"1855\", \"count\": 161}, {\"word\": \"would\", \"year\": \"1907\", \"count\": 159}, {\"word\": \"adelaide\", \"year\": \"1916\", \"count\": 157}, {\"word\": \"february\", \"year\": \"1952\", \"count\": 157}, {\"word\": \"mrs\", \"year\": \"1941\", \"count\": 155}, {\"word\": \"sub\", \"year\": \"1936\", \"count\": 152}, {\"word\": \"por\", \"year\": \"1936\", \"count\": 151}, {\"word\": \"brother\", \"year\": \"1945\", \"count\": 148}, {\"word\": \"1945\", \"year\": \"1945\", \"count\": 148}, {\"word\": \"co\", \"year\": \"1859\", \"count\": 147}, {\"word\": \"loving\", \"year\": \"1942\", \"count\": 146}, {\"word\": \"loved\", \"year\": \"1952\", \"count\": 145}, {\"word\": \"henry\", \"year\": \"1855\", \"count\": 145}, {\"word\": \"per\", \"year\": \"1865\", \"count\": 145}, {\"word\": \"late\", \"year\": \"1952\", \"count\": 143}, {\"word\": \"gold\", \"year\": \"1890\", \"count\": 142}, {\"word\": \"memory\", \"year\": \"1946\", \"count\": 142}, {\"word\": \"street\", \"year\": \"1907\", \"count\": 140}, {\"word\": \"miss\", \"year\": \"1855\", \"count\": 140}, {\"word\": \"st\", \"year\": \"1909\", \"count\": 140}, {\"word\": \"jno\", \"year\": \"1855\", \"count\": 138}, {\"word\": \"street\", \"year\": \"1898\", \"count\": 137}, {\"word\": \"estate\", \"year\": \"1890\", \"count\": 137}, {\"word\": \"borneo\", \"year\": \"1945\", \"count\": 136}, {\"word\": \"side\", \"year\": \"1866\", \"count\": 136}, {\"word\": \"rooms\", \"year\": \"1918\", \"count\": 136}, {\"word\": \"mr\", \"year\": \"1936\", \"count\": 136}, {\"word\": \"mr\", \"year\": \"1899\", \"count\": 135}, {\"word\": \"office\", \"year\": \"1916\", \"count\": 134}, {\"word\": \"10\", \"year\": \"1916\", \"count\": 134}, {\"word\": \"son\", \"year\": \"1945\", \"count\": 134}, {\"word\": \"son\", \"year\": \"1909\", \"count\": 132}, {\"word\": \"gold\", \"year\": \"1889\", \"count\": 131}, {\"word\": \"loving\", \"year\": \"1952\", \"count\": 130}, {\"word\": \"rain\", \"year\": \"1936\", \"count\": 129}, {\"word\": \"good\", \"year\": \"1918\", \"count\": 129}, {\"word\": \"daughter\", \"year\": \"1909\", \"count\": 129}, {\"word\": \"10\", \"year\": \"1899\", \"count\": 128}, {\"word\": \"july\", \"year\": \"1858\", \"count\": 125}, {\"word\": \"mrs\", \"year\": \"1909\", \"count\": 124}, {\"word\": \"sale\", \"year\": \"1859\", \"count\": 122}, {\"word\": \"february\", \"year\": \"1942\", \"count\": 121}, {\"word\": \"brisbane\", \"year\": \"1936\", \"count\": 118}, {\"word\": \"sydney\", \"year\": \"1898\", \"count\": 118}, {\"word\": \"mr\", \"year\": \"1952\", \"count\": 117}, {\"word\": \"street\", \"year\": \"1881\", \"count\": 116}, {\"word\": \"brisbane\", \"year\": \"1917\", \"count\": 116}, {\"word\": \"dear\", \"year\": \"1946\", \"count\": 115}, {\"word\": \"co\", \"year\": \"1918\", \"count\": 114}, {\"word\": \"april\", \"year\": \"1951\", \"count\": 114}, {\"word\": \"mr\", \"year\": \"1942\", \"count\": 114}, {\"word\": \"10\", \"year\": \"1917\", \"count\": 114}, {\"word\": \"street\", \"year\": \"1952\", \"count\": 113}, {\"word\": \"march\", \"year\": \"1876\", \"count\": 113}, {\"word\": \"clock\", \"year\": \"1871\", \"count\": 110}, {\"word\": \"mr\", \"year\": \"1865\", \"count\": 110}, {\"word\": \"new\", \"year\": \"1918\", \"count\": 109}, {\"word\": \"mrs\", \"year\": \"1946\", \"count\": 109}, {\"word\": \"lever\", \"year\": \"1889\", \"count\": 108}, {\"word\": \"loved\", \"year\": \"1946\", \"count\": 107}, {\"word\": \"furnished\", \"year\": \"1936\", \"count\": 107}, {\"word\": \"wragge\", \"year\": \"1936\", \"count\": 106}, {\"word\": \"mrs\", \"year\": \"1948\", \"count\": 106}, {\"word\": \"od\", \"year\": \"1865\", \"count\": 106}, {\"word\": \"str\", \"year\": \"1899\", \"count\": 106}, {\"word\": \"son\", \"year\": \"1952\", \"count\": 105}, {\"word\": \"motor\", \"year\": \"1917\", \"count\": 104}, {\"word\": \"sun\", \"year\": \"1918\", \"count\": 104}, {\"word\": \"street\", \"year\": \"1894\", \"count\": 102}, {\"word\": \"home\", \"year\": \"1952\", \"count\": 100}, {\"word\": \"east\", \"year\": \"1859\", \"count\": 100}, {\"word\": \"mrs\", \"year\": \"1942\", \"count\": 100}, {\"word\": \"road\", \"year\": \"1936\", \"count\": 100}, {\"word\": \"albert\", \"year\": \"1889\", \"count\": 100}, {\"word\": \"street\", \"year\": \"1871\", \"count\": 98}, {\"word\": \"ring\", \"year\": \"1889\", \"count\": 97}, {\"word\": \"july\", \"year\": \"1948\", \"count\": 97}, {\"word\": \"collins\", \"year\": \"1859\", \"count\": 97}, {\"word\": \"gold\", \"year\": \"1868\", \"count\": 97}, {\"word\": \"inclusive\", \"year\": \"1918\", \"count\": 96}, {\"word\": \"new\", \"year\": \"1898\", \"count\": 96}, {\"word\": \"st\", \"year\": \"1952\", \"count\": 96}, {\"word\": \"mother\", \"year\": \"1946\", \"count\": 94}, {\"word\": \"street\", \"year\": \"1890\", \"count\": 94}, {\"word\": \"may\", \"year\": \"1898\", \"count\": 94}, {\"word\": \"co\", \"year\": \"1893\", \"count\": 94}, {\"word\": \"street\", \"year\": \"1856\", \"count\": 93}, {\"word\": \"rain\", \"year\": \"1944\", \"count\": 93}, {\"word\": \"20\", \"year\": \"1898\", \"count\": 93}, {\"word\": \"18\", \"year\": \"1942\", \"count\": 92}, {\"word\": \"away\", \"year\": \"1946\", \"count\": 92}, {\"word\": \"000\", \"year\": \"1865\", \"count\": 92}, {\"word\": \"street\", \"year\": \"1864\", \"count\": 91}, {\"word\": \"12\", \"year\": \"1858\", \"count\": 91}, {\"word\": \"west\", \"year\": \"1859\", \"count\": 91}, {\"word\": \"good\", \"year\": \"1893\", \"count\": 91}, {\"word\": \"11\", \"year\": \"1898\", \"count\": 90}, {\"word\": \"melbourne\", \"year\": \"1859\", \"count\": 89}, {\"word\": \"mrs\", \"year\": \"1893\", \"count\": 89}, {\"word\": \"phone\", \"year\": \"1917\", \"count\": 89}, {\"word\": \"passed\", \"year\": \"1946\", \"count\": 89}, {\"word\": \"co\", \"year\": \"1917\", \"count\": 89}, {\"word\": \"xx\", \"year\": \"1894\", \"count\": 89}, {\"word\": \"others\", \"year\": \"1918\", \"count\": 88}, {\"word\": \"sks\", \"year\": \"1950\", \"count\": 86}, {\"word\": \"inserted\", \"year\": \"1946\", \"count\": 86}, {\"word\": \"street\", \"year\": \"1900\", \"count\": 86}, {\"word\": \"stakes\", \"year\": \"1940\", \"count\": 85}, {\"word\": \"10\", \"year\": \"1894\", \"count\": 84}, {\"word\": \"may\", \"year\": \"1859\", \"count\": 84}, {\"word\": \"new\", \"year\": \"1899\", \"count\": 84}, {\"word\": \"tenders\", \"year\": \"1859\", \"count\": 84}, {\"word\": \"beef\", \"year\": \"1950\", \"count\": 83}, {\"word\": \"sea\", \"year\": \"1899\", \"count\": 83}, {\"word\": \"one\", \"year\": \"1898\", \"count\": 83}, {\"word\": \"pianos\", \"year\": \"1881\", \"count\": 83}, {\"word\": \"mr\", \"year\": \"1934\", \"count\": 82}, {\"word\": \"street\", \"year\": \"1858\", \"count\": 82}, {\"word\": \"shipper\", \"year\": \"1950\", \"count\": 81}, {\"word\": \"11\", \"year\": \"1893\", \"count\": 81}, {\"word\": \"nov\", \"year\": \"1856\", \"count\": 81}, {\"word\": \"south\", \"year\": \"1899\", \"count\": 81}, {\"word\": \"one\", \"year\": \"1940\", \"count\": 79}, {\"word\": \"silver\", \"year\": \"1868\", \"count\": 79}, {\"word\": \"str\", \"year\": \"1898\", \"count\": 79}, {\"word\": \"new\", \"year\": \"1940\", \"count\": 79}, {\"word\": \"st\", \"year\": \"1898\", \"count\": 79}, {\"word\": \"co\", \"year\": \"1871\", \"count\": 79}, {\"word\": \"co\", \"year\": \"1894\", \"count\": 79}, {\"word\": \"son\", \"year\": \"1942\", \"count\": 78}, {\"word\": \"office\", \"year\": \"1859\", \"count\": 77}, {\"word\": \"loved\", \"year\": \"1948\", \"count\": 77}, {\"word\": \"sale\", \"year\": \"1858\", \"count\": 77}, {\"word\": \"silver\", \"year\": \"1890\", \"count\": 77}, {\"word\": \"new\", \"year\": \"1917\", \"count\": 77}, {\"word\": \"john\", \"year\": \"1899\", \"count\": 76}, {\"word\": \"land\", \"year\": \"1917\", \"count\": 75}, {\"word\": \"mr\", \"year\": \"1882\", \"count\": 75}, {\"word\": \"sale\", \"year\": \"1871\", \"count\": 75}, {\"word\": \"clock\", \"year\": \"1858\", \"count\": 75}, {\"word\": \"rooms\", \"year\": \"1893\", \"count\": 74}, {\"word\": \"memory\", \"year\": \"1942\", \"count\": 74}, {\"word\": \"house\", \"year\": \"1893\", \"count\": 74}, {\"word\": \"good\", \"year\": \"1894\", \"count\": 72}, {\"word\": \"melbourne\", \"year\": \"1917\", \"count\": 71}, {\"word\": \"mr\", \"year\": \"1900\", \"count\": 71}, {\"word\": \"mrs\", \"year\": \"1894\", \"count\": 71}, {\"word\": \"auction\", \"year\": \"1871\", \"count\": 71}, {\"word\": \"rain\", \"year\": \"1934\", \"count\": 70}, {\"word\": \"sydney\", \"year\": \"1899\", \"count\": 70}, {\"word\": \"10\", \"year\": \"1893\", \"count\": 70}, {\"word\": \"fine\", \"year\": \"1899\", \"count\": 70}, {\"word\": \"south\", \"year\": \"1866\", \"count\": 69}, {\"word\": \"iron\", \"year\": \"1881\", \"count\": 69}, {\"word\": \"bo\", \"year\": \"1865\", \"count\": 69}, {\"word\": \"late\", \"year\": \"1942\", \"count\": 68}, {\"word\": \"framed\", \"year\": \"1881\", \"count\": 68}, {\"word\": \"11\", \"year\": \"1899\", \"count\": 68}, {\"word\": \"years\", \"year\": \"1948\", \"count\": 67}, {\"word\": \"bales\", \"year\": \"1865\", \"count\": 67}, {\"word\": \"away\", \"year\": \"1942\", \"count\": 67}, {\"word\": \"dear\", \"year\": \"1942\", \"count\": 66}, {\"word\": \"may\", \"year\": \"1891\", \"count\": 66}, {\"word\": \"10s\", \"year\": \"1865\", \"count\": 66}, {\"word\": \"watoh\", \"year\": \"1890\", \"count\": 65}, {\"word\": \"auction\", \"year\": \"1892\", \"count\": 65}, {\"word\": \"wanted\", \"year\": \"1893\", \"count\": 65}, {\"word\": \"sell\", \"year\": \"1871\", \"count\": 65}, {\"word\": \"por\", \"year\": \"1865\", \"count\": 64}, {\"word\": \"watch\", \"year\": \"1868\", \"count\": 64}, {\"word\": \"years\", \"year\": \"1949\", \"count\": 64}, {\"word\": \"sell\", \"year\": \"1858\", \"count\": 64}, {\"word\": \"mr\", \"year\": \"1854\", \"count\": 64}, {\"word\": \"aro\", \"year\": \"1865\", \"count\": 63}, {\"word\": \"years\", \"year\": \"1940\", \"count\": 63}, {\"word\": \"terms\", \"year\": \"1894\", \"count\": 63}, {\"word\": \"ring\", \"year\": \"1890\", \"count\": 63}, {\"word\": \"street\", \"year\": \"1889\", \"count\": 62}, {\"word\": \"street\", \"year\": \"1872\", \"count\": 62}, {\"word\": \"geneva\", \"year\": \"1889\", \"count\": 62}, {\"word\": \"3d\", \"year\": \"1865\", \"count\": 62}, {\"word\": \"mr\", \"year\": \"1940\", \"count\": 62}, {\"word\": \"price\", \"year\": \"1894\", \"count\": 62}, {\"word\": \"6d\", \"year\": \"1891\", \"count\": 62}, {\"word\": \"north\", \"year\": \"1866\", \"count\": 61}, {\"word\": \"may\", \"year\": \"1868\", \"count\": 61}, {\"word\": \"cash\", \"year\": \"1894\", \"count\": 60}, {\"word\": \"st\", \"year\": \"1889\", \"count\": 60}, {\"word\": \"brisbane\", \"year\": \"1890\", \"count\": 59}, {\"word\": \"allotments\", \"year\": \"1890\", \"count\": 59}, {\"word\": \"co\", \"year\": \"1858\", \"count\": 58}, {\"word\": \"mr\", \"year\": \"1885\", \"count\": 58}, {\"word\": \"south\", \"year\": \"1940\", \"count\": 58}, {\"word\": \"mr\", \"year\": \"1856\", \"count\": 58}, {\"word\": \"mr\", \"year\": \"1897\", \"count\": 58}, {\"word\": \"aid\", \"year\": \"1889\", \"count\": 57}, {\"word\": \"6d\", \"year\": \"1876\", \"count\": 57}, {\"word\": \"lever\", \"year\": \"1890\", \"count\": 56}, {\"word\": \"day\", \"year\": \"1940\", \"count\": 56}, {\"word\": \"street\", \"year\": \"1892\", \"count\": 56}, {\"word\": \"auction\", \"year\": \"1858\", \"count\": 55}, {\"word\": \"sale\", \"year\": \"1890\", \"count\": 55}, {\"word\": \"al\", \"year\": \"1940\", \"count\": 55}, {\"word\": \"per\", \"year\": \"1897\", \"count\": 54}, {\"word\": \"mr\", \"year\": \"1892\", \"count\": 54}, {\"word\": \"street\", \"year\": \"1854\", \"count\": 54}, {\"word\": \"brisbane\", \"year\": \"1900\", \"count\": 53}, {\"word\": \"william\", \"year\": \"1900\", \"count\": 53}, {\"word\": \"street\", \"year\": \"1897\", \"count\": 53}, {\"word\": \"st\", \"year\": \"1948\", \"count\": 52}, {\"word\": \"october\", \"year\": \"1892\", \"count\": 52}, {\"word\": \"one\", \"year\": \"1934\", \"count\": 51}, {\"word\": \"east\", \"year\": \"1866\", \"count\": 51}, {\"word\": \"sil\", \"year\": \"1889\", \"count\": 51}, {\"word\": \"co\", \"year\": \"1872\", \"count\": 51}, {\"word\": \"said\", \"year\": \"1940\", \"count\": 50}, {\"word\": \"april\", \"year\": \"1871\", \"count\": 50}, {\"word\": \"late\", \"year\": \"1948\", \"count\": 50}, {\"word\": \"dress\", \"year\": \"1889\", \"count\": 50}, {\"word\": \"yesterday\", \"year\": \"1896\", \"count\": 50}, {\"word\": \"sale\", \"year\": \"1892\", \"count\": 50}, {\"word\": \"new\", \"year\": \"1895\", \"count\": 49}, {\"word\": \"new\", \"year\": \"1896\", \"count\": 49}, {\"word\": \"sandhurst\", \"year\": \"1882\", \"count\": 49}, {\"word\": \"melbourne\", \"year\": \"1872\", \"count\": 49}, {\"word\": \"bo\", \"year\": \"1872\", \"count\": 49}, {\"word\": \"11\", \"year\": \"1891\", \"count\": 49}, {\"word\": \"st\", \"year\": \"1940\", \"count\": 49}, {\"word\": \"street\", \"year\": \"1891\", \"count\": 48}, {\"word\": \"beloved\", \"year\": \"1948\", \"count\": 48}, {\"word\": \"john\", \"year\": \"1900\", \"count\": 48}, {\"word\": \"10\", \"year\": \"1866\", \"count\": 48}, {\"word\": \"company\", \"year\": \"1872\", \"count\": 48}, {\"word\": \"would\", \"year\": \"1895\", \"count\": 47}, {\"word\": \"day\", \"year\": \"1892\", \"count\": 47}, {\"word\": \"terms\", \"year\": \"1858\", \"count\": 47}, {\"word\": \"deceased\", \"year\": \"1948\", \"count\": 47}, {\"word\": \"august\", \"year\": \"1900\", \"count\": 47}, {\"word\": \"new\", \"year\": \"1897\", \"count\": 46}, {\"word\": \"time\", \"year\": \"1895\", \"count\": 46}, {\"word\": \"rain\", \"year\": \"1947\", \"count\": 46}, {\"word\": \"monday\", \"year\": \"1858\", \"count\": 46}, {\"word\": \"fat\", \"year\": \"1892\", \"count\": 46}, {\"word\": \"drought\", \"year\": \"1949\", \"count\": 46}, {\"word\": \"south\", \"year\": \"1895\", \"count\": 44}, {\"word\": \"west\", \"year\": \"1866\", \"count\": 44}, {\"word\": \"12\", \"year\": \"1866\", \"count\": 44}, {\"word\": \"co\", \"year\": \"1897\", \"count\": 44}, {\"word\": \"eleven\", \"year\": \"1871\", \"count\": 44}, {\"word\": \"co\", \"year\": \"1900\", \"count\": 43}, {\"word\": \"sell\", \"year\": \"1892\", \"count\": 43}, {\"word\": \"one\", \"year\": \"1897\", \"count\": 43}, {\"word\": \"south\", \"year\": \"1896\", \"count\": 43}, {\"word\": \"sale\", \"year\": \"1856\", \"count\": 43}, {\"word\": \"weather\", \"year\": \"1934\", \"count\": 43}, {\"word\": \"loving\", \"year\": \"1951\", \"count\": 43}, {\"word\": \"10\", \"year\": \"1900\", \"count\": 43}, {\"word\": \"mother\", \"year\": \"1948\", \"count\": 42}, {\"word\": \"dearly\", \"year\": \"1948\", \"count\": 41}, {\"word\": \"day\", \"year\": \"1871\", \"count\": 41}, {\"word\": \"friday\", \"year\": \"1871\", \"count\": 41}, {\"word\": \"mr\", \"year\": \"1881\", \"count\": 41}, {\"word\": \"prime\", \"year\": \"1892\", \"count\": 41}, {\"word\": \"george\", \"year\": \"1900\", \"count\": 40}, {\"word\": \"auction\", \"year\": \"1900\", \"count\": 40}, {\"word\": \"rain\", \"year\": \"1951\", \"count\": 40}, {\"word\": \"per\", \"year\": \"1872\", \"count\": 40}, {\"word\": \"esq\", \"year\": \"1866\", \"count\": 39}, {\"word\": \"years\", \"year\": \"1934\", \"count\": 39}, {\"word\": \"tenders\", \"year\": \"1864\", \"count\": 39}, {\"word\": \"10\", \"year\": \"1891\", \"count\": 39}, {\"word\": \"mr\", \"year\": \"1951\", \"count\": 39}, {\"word\": \"per\", \"year\": \"1876\", \"count\": 39}, {\"word\": \"bo\", \"year\": \"1854\", \"count\": 39}, {\"word\": \"bond\", \"year\": \"1934\", \"count\": 39}, {\"word\": \"heart\", \"year\": \"1881\", \"count\": 39}, {\"word\": \"work\", \"year\": \"1895\", \"count\": 38}, {\"word\": \"city\", \"year\": \"1872\", \"count\": 38}, {\"word\": \"mr\", \"year\": \"1896\", \"count\": 38}, {\"word\": \"new\", \"year\": \"1891\", \"count\": 38}, {\"word\": \"11\", \"year\": \"1892\", \"count\": 38}, {\"word\": \"11\", \"year\": \"1866\", \"count\": 38}, {\"word\": \"apply\", \"year\": \"1856\", \"count\": 37}, {\"word\": \"zeehan\", \"year\": \"1897\", \"count\": 37}, {\"word\": \"queensland\", \"year\": \"1934\", \"count\": 37}, {\"word\": \"wragge\", \"year\": \"1934\", \"count\": 36}, {\"word\": \"also\", \"year\": \"1895\", \"count\": 36}, {\"word\": \"marble\", \"year\": \"1881\", \"count\": 36}, {\"word\": \"ladies\", \"year\": \"1897\", \"count\": 36}, {\"word\": \"made\", \"year\": \"1895\", \"count\": 36}, {\"word\": \"land\", \"year\": \"1856\", \"count\": 36}, {\"word\": \"day\", \"year\": \"1897\", \"count\": 36}, {\"word\": \"last\", \"year\": \"1895\", \"count\": 36}, {\"word\": \"100\", \"year\": \"1897\", \"count\": 35}, {\"word\": \"passed\", \"year\": \"1951\", \"count\": 35}, {\"word\": \"co\", \"year\": \"1882\", \"count\": 35}, {\"word\": \"melbourne\", \"year\": \"1895\", \"count\": 35}, {\"word\": \"melbourne\", \"year\": \"1881\", \"count\": 35}, {\"word\": \"mrs\", \"year\": \"1951\", \"count\": 35}, {\"word\": \"per\", \"year\": \"1882\", \"count\": 35}, {\"word\": \"late\", \"year\": \"1951\", \"count\": 35}, {\"word\": \"mrs\", \"year\": \"1891\", \"count\": 34}, {\"word\": \"sale\", \"year\": \"1868\", \"count\": 34}, {\"word\": \"sale\", \"year\": \"1864\", \"count\": 34}, {\"word\": \"lever\", \"year\": \"1868\", \"count\": 33}, {\"word\": \"year\", \"year\": \"1934\", \"count\": 33}, {\"word\": \"tbo\", \"year\": \"1872\", \"count\": 33}, {\"word\": \"co\", \"year\": \"1854\", \"count\": 33}, {\"word\": \"geneva\", \"year\": \"1868\", \"count\": 33}, {\"word\": \"10s\", \"year\": \"1876\", \"count\": 33}, {\"word\": \"memory\", \"year\": \"1951\", \"count\": 33}, {\"word\": \"weather\", \"year\": \"1949\", \"count\": 32}, {\"word\": \"father\", \"year\": \"1951\", \"count\": 32}, {\"word\": \"loved\", \"year\": \"1951\", \"count\": 32}, {\"word\": \"last\", \"year\": \"1896\", \"count\": 32}, {\"word\": \"melbourne\", \"year\": \"1854\", \"count\": 31}, {\"word\": \"ditto\", \"year\": \"1868\", \"count\": 31}, {\"word\": \"made\", \"year\": \"1944\", \"count\": 31}, {\"word\": \"co\", \"year\": \"1856\", \"count\": 31}, {\"word\": \"work\", \"year\": \"1934\", \"count\": 31}, {\"word\": \"street\", \"year\": \"1868\", \"count\": 31}, {\"word\": \"office\", \"year\": \"1872\", \"count\": 31}, {\"word\": \"sydney\", \"year\": \"1896\", \"count\": 30}, {\"word\": \"ring\", \"year\": \"1868\", \"count\": 30}, {\"word\": \"water\", \"year\": \"1954\", \"count\": 30}, {\"word\": \"february\", \"year\": \"1872\", \"count\": 30}, {\"word\": \"year\", \"year\": \"1949\", \"count\": 30}, {\"word\": \"melbourne\", \"year\": \"1864\", \"count\": 29}, {\"word\": \"11d\", \"year\": \"1891\", \"count\": 29}, {\"word\": \"collins\", \"year\": \"1881\", \"count\": 29}, {\"word\": \"produce\", \"year\": \"1944\", \"count\": 29}, {\"word\": \"clouds\", \"year\": \"1944\", \"count\": 29}, {\"word\": \"black\", \"year\": \"1891\", \"count\": 29}, {\"word\": \"street\", \"year\": \"1882\", \"count\": 29}, {\"word\": \"east\", \"year\": \"1881\", \"count\": 29}, {\"word\": \"weather\", \"year\": \"1944\", \"count\": 29}, {\"word\": \"office\", \"year\": \"1864\", \"count\": 29}, {\"word\": \"pm\", \"year\": \"1876\", \"count\": 28}, {\"word\": \"near\", \"year\": \"1856\", \"count\": 28}, {\"word\": \"guns\", \"year\": \"1947\", \"count\": 28}, {\"word\": \"reduced\", \"year\": \"1891\", \"count\": 28}, {\"word\": \"one\", \"year\": \"1949\", \"count\": 27}, {\"word\": \"may\", \"year\": \"1896\", \"count\": 27}, {\"word\": \"10s\", \"year\": \"1882\", \"count\": 27}, {\"word\": \"fine\", \"year\": \"1896\", \"count\": 27}, {\"word\": \"station\", \"year\": \"1864\", \"count\": 27}, {\"word\": \"seven\", \"year\": \"1949\", \"count\": 27}, {\"word\": \"one\", \"year\": \"1944\", \"count\": 26}, {\"word\": \"per\", \"year\": \"1854\", \"count\": 26}, {\"word\": \"one\", \"year\": \"1896\", \"count\": 26}, {\"word\": \"experiment\", \"year\": \"1944\", \"count\": 26}, {\"word\": \"drought\", \"year\": \"1944\", \"count\": 26}, {\"word\": \"sandhurst\", \"year\": \"1876\", \"count\": 26}, {\"word\": \"let\", \"year\": \"1856\", \"count\": 26}, {\"word\": \"made\", \"year\": \"1896\", \"count\": 26}, {\"word\": \"may\", \"year\": \"1864\", \"count\": 25}, {\"word\": \"sheep\", \"year\": \"1864\", \"count\": 25}, {\"word\": \"august\", \"year\": \"1882\", \"count\": 25}, {\"word\": \"north\", \"year\": \"1949\", \"count\": 25}, {\"word\": \"co\", \"year\": \"1864\", \"count\": 25}, {\"word\": \"apply\", \"year\": \"1864\", \"count\": 25}, {\"word\": \"would\", \"year\": \"1944\", \"count\": 25}, {\"word\": \"hotel\", \"year\": \"1854\", \"count\": 24}, {\"word\": \"coast\", \"year\": \"1949\", \"count\": 24}, {\"word\": \"tribute\", \"year\": \"1876\", \"count\": 24}, {\"word\": \"company\", \"year\": \"1876\", \"count\": 24}, {\"word\": \"cattle\", \"year\": \"1870\", \"count\": 24}, {\"word\": \"day\", \"year\": \"1854\", \"count\": 23}, {\"word\": \"river\", \"year\": \"1949\", \"count\": 22}, {\"word\": \"3d\", \"year\": \"1876\", \"count\": 22}, {\"word\": \"new\", \"year\": \"1876\", \"count\": 22}, {\"word\": \"made\", \"year\": \"1947\", \"count\": 22}, {\"word\": \"nile\", \"year\": \"1949\", \"count\": 22}, {\"word\": \"store\", \"year\": \"1870\", \"count\": 22}, {\"word\": \"years\", \"year\": \"1944\", \"count\": 22}, {\"word\": \"near\", \"year\": \"1854\", \"count\": 22}, {\"word\": \"horse\", \"year\": \"1856\", \"count\": 21}, {\"word\": \"company\", \"year\": \"1882\", \"count\": 21}, {\"word\": \"air\", \"year\": \"1882\", \"count\": 21}, {\"word\": \"man\", \"year\": \"1947\", \"count\": 21}, {\"word\": \"dam\", \"year\": \"1954\", \"count\": 21}, {\"word\": \"mrs\", \"year\": \"1854\", \"count\": 21}, {\"word\": \"made\", \"year\": \"1885\", \"count\": 20}, {\"word\": \"sale\", \"year\": \"1882\", \"count\": 20}, {\"word\": \"one\", \"year\": \"1947\", \"count\": 19}, {\"word\": \"machine\", \"year\": \"1857\", \"count\": 19}, {\"word\": \"fat\", \"year\": \"1870\", \"count\": 19}, {\"word\": \"byrnes\", \"year\": \"1885\", \"count\": 19}, {\"word\": \"ward\", \"year\": \"1867\", \"count\": 18}, {\"word\": \"years\", \"year\": \"1947\", \"count\": 18}, {\"word\": \"new\", \"year\": \"1885\", \"count\": 18}, {\"word\": \"weather\", \"year\": \"1947\", \"count\": 18}, {\"word\": \"board\", \"year\": \"1885\", \"count\": 17}, {\"word\": \"sheep\", \"year\": \"1870\", \"count\": 17}, {\"word\": \"jones\", \"year\": \"1954\", \"count\": 17}, {\"word\": \"company\", \"year\": \"1885\", \"count\": 17}, {\"word\": \"meeting\", \"year\": \"1885\", \"count\": 17}, {\"word\": \"would\", \"year\": \"1885\", \"count\": 17}, {\"word\": \"mr\", \"year\": \"1954\", \"count\": 17}, {\"word\": \"000\", \"year\": \"1954\", \"count\": 17}, {\"word\": \"experiment\", \"year\": \"1947\", \"count\": 16}, {\"word\": \"weather\", \"year\": \"1954\", \"count\": 16}, {\"word\": \"government\", \"year\": \"1885\", \"count\": 16}, {\"word\": \"australia\", \"year\": \"1947\", \"count\": 15}, {\"word\": \"smith\", \"year\": \"1867\", \"count\": 15}, {\"word\": \"association\", \"year\": \"1885\", \"count\": 14}, {\"word\": \"first\", \"year\": \"1947\", \"count\": 14}, {\"word\": \"company\", \"year\": \"1954\", \"count\": 14}, {\"word\": \"union\", \"year\": \"1943\", \"count\": 13}, {\"word\": \"ditto\", \"year\": \"1870\", \"count\": 12}, {\"word\": \"would\", \"year\": \"1954\", \"count\": 12}, {\"word\": \"week\", \"year\": \"1943\", \"count\": 12}, {\"word\": \"drought\", \"year\": \"1954\", \"count\": 11}, {\"word\": \"nnd\", \"year\": \"1867\", \"count\": 11}, {\"word\": \"council\", \"year\": \"1867\", \"count\": 11}, {\"word\": \"council\", \"year\": \"1869\", \"count\": 11}, {\"word\": \"made\", \"year\": \"1857\", \"count\": 11}, {\"word\": \"one\", \"year\": \"1943\", \"count\": 11}, {\"word\": \"industry\", \"year\": \"1943\", \"count\": 11}, {\"word\": \"hours\", \"year\": \"1943\", \"count\": 11}, {\"word\": \"inigo\", \"year\": \"1954\", \"count\": 11}, {\"word\": \"messrs\", \"year\": \"1857\", \"count\": 10}, {\"word\": \"good\", \"year\": \"1870\", \"count\": 10}, {\"word\": \"large\", \"year\": \"1857\", \"count\": 10}, {\"word\": \"10s\", \"year\": \"1870\", \"count\": 10}, {\"word\": \"wages\", \"year\": \"1943\", \"count\": 10}, {\"word\": \"one\", \"year\": \"1857\", \"count\": 10}, {\"word\": \"city\", \"year\": \"1867\", \"count\": 9}, {\"word\": \"wethers\", \"year\": \"1870\", \"count\": 9}, {\"word\": \"members\", \"year\": \"1943\", \"count\": 9}, {\"word\": \"work\", \"year\": \"1943\", \"count\": 9}, {\"word\": \"time\", \"year\": \"1943\", \"count\": 9}, {\"word\": \"year\", \"year\": \"1857\", \"count\": 9}, {\"word\": \"cr\", \"year\": \"1867\", \"count\": 8}, {\"word\": \"wragge\", \"year\": \"1987\", \"count\": 8}, {\"word\": \"mr\", \"year\": \"1870\", \"count\": 8}, {\"word\": \"passed\", \"year\": \"1870\", \"count\": 8}, {\"word\": \"two\", \"year\": \"1857\", \"count\": 8}, {\"word\": \"employers\", \"year\": \"1943\", \"count\": 8}, {\"word\": \"committee\", \"year\": \"1867\", \"count\": 7}, {\"word\": \"city\", \"year\": \"1869\", \"count\": 7}, {\"word\": \"prizes\", \"year\": \"1857\", \"count\": 7}, {\"word\": \"iron\", \"year\": \"1857\", \"count\": 7}, {\"word\": \"aid\", \"year\": \"1867\", \"count\": 7}, {\"word\": \"three\", \"year\": \"1857\", \"count\": 7}, {\"word\": \"mayor\", \"year\": \"1869\", \"count\": 6}, {\"word\": \"first\", \"year\": \"1988\", \"count\": 6}, {\"word\": \"mrs\", \"year\": \"1988\", \"count\": 6}, {\"word\": \"motion\", \"year\": \"1867\", \"count\": 6}, {\"word\": \"life\", \"year\": \"1988\", \"count\": 6}, {\"word\": \"committee\", \"year\": \"1869\", \"count\": 6}, {\"word\": \"observatory\", \"year\": \"1987\", \"count\": 5}, {\"word\": \"company\", \"year\": \"1869\", \"count\": 5}, {\"word\": \"aid\", \"year\": \"1869\", \"count\": 5}, {\"word\": \"fund\", \"year\": \"1867\", \"count\": 5}, {\"word\": \"town\", \"year\": \"1869\", \"count\": 5}, {\"word\": \"man\", \"year\": \"1988\", \"count\": 5}, {\"word\": \"moved\", \"year\": \"1869\", \"count\": 5}, {\"word\": \"mr\", \"year\": \"1988\", \"count\": 5}, {\"word\": \"time\", \"year\": \"1988\", \"count\": 5}, {\"word\": \"portion\", \"year\": \"1869\", \"count\": 4}, {\"word\": \"tell\", \"year\": \"1988\", \"count\": 4}, {\"word\": \"building\", \"year\": \"1869\", \"count\": 4}, {\"word\": \"dray\", \"year\": \"1988\", \"count\": 4}, {\"word\": \"says\", \"year\": \"1988\", \"count\": 4}, {\"word\": \"years\", \"year\": \"1988\", \"count\": 4}, {\"word\": \"kosciusko\", \"year\": \"1987\", \"count\": 4}, {\"word\": \"later\", \"year\": \"1987\", \"count\": 4}, {\"word\": \"station\", \"year\": \"1987\", \"count\": 4}, {\"word\": \"canberra\", \"year\": \"1987\", \"count\": 3}, {\"word\": \"one\", \"year\": \"1987\", \"count\": 3}, {\"word\": \"made\", \"year\": \"1884\", \"count\": 3}, {\"word\": \"first\", \"year\": \"1987\", \"count\": 3}, {\"word\": \"society\", \"year\": \"1987\", \"count\": 3}, {\"word\": \"mt\", \"year\": \"1987\", \"count\": 3}, {\"word\": \"would\", \"year\": \"1884\", \"count\": 3}, {\"word\": \"one\", \"year\": \"1884\", \"count\": 2}, {\"word\": \"fogarty\", \"year\": \"1884\", \"count\": 2}, {\"word\": \"imported\", \"year\": \"1884\", \"count\": 2}, {\"word\": \"mr\", \"year\": \"1884\", \"count\": 2}, {\"word\": \"morning\", \"year\": \"1884\", \"count\": 2}, {\"word\": \"goulburn\", \"year\": \"1884\", \"count\": 2}, {\"word\": \"tons\", \"year\": \"1884\", \"count\": 2}, {\"word\": \"hospital\", \"year\": \"1884\", \"count\": 2}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.Chart(...)"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "alt.Chart(top_words_by_year).mark_bar().encode(\n",
    "    y=alt.Y(\"word:N\", sort=\"-x\"), x=\"count:Q\", facet=alt.Facet(\"year\", columns=4)\n",
    ").properties(width=120, height=120).resolve_scale(x=\"independent\", y=\"independent\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "## Visualise word frequencies over time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "### Create a faceted chart"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "<style>\n",
       "  #altair-viz-17a5605de3844d8cb1575db3616d4a3e.vega-embed {\n",
       "    width: 100%;\n",
       "    display: flex;\n",
       "  }\n",
       "\n",
       "  #altair-viz-17a5605de3844d8cb1575db3616d4a3e.vega-embed details,\n",
       "  #altair-viz-17a5605de3844d8cb1575db3616d4a3e.vega-embed details summary {\n",
       "    position: relative;\n",
       "  }\n",
       "</style>\n",
       "<div id=\"altair-viz-17a5605de3844d8cb1575db3616d4a3e\"></div>\n",
       "<script type=\"text/javascript\">\n",
       "  var VEGA_DEBUG = (typeof VEGA_DEBUG == \"undefined\") ? {} : VEGA_DEBUG;\n",
       "  (function(spec, embedOpt){\n",
       "    let outputDiv = document.currentScript.previousElementSibling;\n",
       "    if (outputDiv.id !== \"altair-viz-17a5605de3844d8cb1575db3616d4a3e\") {\n",
       "      outputDiv = document.getElementById(\"altair-viz-17a5605de3844d8cb1575db3616d4a3e\");\n",
       "    }\n",
       "    const paths = {\n",
       "      \"vega\": \"https://cdn.jsdelivr.net/npm/vega@5?noext\",\n",
       "      \"vega-lib\": \"https://cdn.jsdelivr.net/npm/vega-lib?noext\",\n",
       "      \"vega-lite\": \"https://cdn.jsdelivr.net/npm/vega-lite@5.14.1?noext\",\n",
       "      \"vega-embed\": \"https://cdn.jsdelivr.net/npm/vega-embed@6?noext\",\n",
       "    };\n",
       "\n",
       "    function maybeLoadScript(lib, version) {\n",
       "      var key = `${lib.replace(\"-\", \"\")}_version`;\n",
       "      return (VEGA_DEBUG[key] == version) ?\n",
       "        Promise.resolve(paths[lib]) :\n",
       "        new Promise(function(resolve, reject) {\n",
       "          var s = document.createElement('script');\n",
       "          document.getElementsByTagName(\"head\")[0].appendChild(s);\n",
       "          s.async = true;\n",
       "          s.onload = () => {\n",
       "            VEGA_DEBUG[key] = version;\n",
       "            return resolve(paths[lib]);\n",
       "          };\n",
       "          s.onerror = () => reject(`Error loading script: ${paths[lib]}`);\n",
       "          s.src = paths[lib];\n",
       "        });\n",
       "    }\n",
       "\n",
       "    function showError(err) {\n",
       "      outputDiv.innerHTML = `<div class=\"error\" style=\"color:red;\">${err}</div>`;\n",
       "      throw err;\n",
       "    }\n",
       "\n",
       "    function displayChart(vegaEmbed) {\n",
       "      vegaEmbed(outputDiv, spec, embedOpt)\n",
       "        .catch(err => showError(`Javascript Error: ${err.message}<br>This usually means there's a typo in your chart specification. See the javascript console for the full traceback.`));\n",
       "    }\n",
       "\n",
       "    if(typeof define === \"function\" && define.amd) {\n",
       "      requirejs.config({paths});\n",
       "      require([\"vega-embed\"], displayChart, err => showError(`Error loading script: ${err.message}`));\n",
       "    } else {\n",
       "      maybeLoadScript(\"vega\", \"5\")\n",
       "        .then(() => maybeLoadScript(\"vega-lite\", \"5.14.1\"))\n",
       "        .then(() => maybeLoadScript(\"vega-embed\", \"6\"))\n",
       "        .catch(showError)\n",
       "        .then(() => displayChart(vegaEmbed));\n",
       "    }\n",
       "  })({\"config\": {\"view\": {\"continuousWidth\": 300, \"continuousHeight\": 300}}, \"data\": {\"name\": \"data-6678afb6a29210b1f1440470edf43ecf\"}, \"mark\": {\"type\": \"line\"}, \"encoding\": {\"color\": {\"field\": \"word\", \"type\": \"nominal\"}, \"facet\": {\"columns\": 1, \"field\": \"word\", \"type\": \"nominal\"}, \"x\": {\"axis\": {\"format\": \"c\", \"title\": \"Year\"}, \"field\": \"year\", \"type\": \"quantitative\"}, \"y\": {\"field\": \"count\", \"type\": \"quantitative\"}}, \"height\": 100, \"resolve\": {\"scale\": {\"y\": \"independent\"}}, \"width\": 700, \"$schema\": \"https://vega.github.io/schema/vega-lite/v5.14.1.json\", \"datasets\": {\"data-6678afb6a29210b1f1440470edf43ecf\": [{\"word\": \"snow\", \"year\": \"1854\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1855\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1856\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1857\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1858\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1859\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1864\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1865\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1866\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1867\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1868\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1869\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1870\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1871\", \"count\": 3}, {\"word\": \"snow\", \"year\": \"1872\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1876\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1881\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1882\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1884\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1885\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1889\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1890\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1891\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1892\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1893\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1894\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1895\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1896\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1897\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1898\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1899\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1900\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1901\", \"count\": 5}, {\"word\": \"snow\", \"year\": \"1902\", \"count\": 90}, {\"word\": \"snow\", \"year\": \"1903\", \"count\": 38}, {\"word\": \"snow\", \"year\": \"1904\", \"count\": 16}, {\"word\": \"snow\", \"year\": \"1905\", \"count\": 3}, {\"word\": \"snow\", \"year\": \"1906\", \"count\": 6}, {\"word\": \"snow\", \"year\": \"1907\", \"count\": 15}, {\"word\": \"snow\", \"year\": \"1908\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1909\", \"count\": 1}, {\"word\": \"snow\", \"year\": \"1910\", \"count\": 3}, {\"word\": \"snow\", \"year\": \"1911\", \"count\": 9}, {\"word\": \"snow\", \"year\": \"1912\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1913\", \"count\": 3}, {\"word\": \"snow\", \"year\": \"1914\", \"count\": 1}, {\"word\": \"snow\", \"year\": \"1915\", \"count\": 5}, {\"word\": \"snow\", \"year\": \"1916\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1917\", \"count\": 1}, {\"word\": \"snow\", \"year\": \"1918\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1919\", \"count\": 5}, {\"word\": \"snow\", \"year\": \"1920\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1921\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1922\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1923\", \"count\": 4}, {\"word\": \"snow\", \"year\": \"1924\", \"count\": 7}, {\"word\": \"snow\", \"year\": \"1925\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1926\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1927\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1928\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1929\", \"count\": 9}, {\"word\": \"snow\", \"year\": \"1930\", \"count\": 5}, {\"word\": \"snow\", \"year\": \"1931\", \"count\": 4}, {\"word\": \"snow\", \"year\": \"1932\", \"count\": 6}, {\"word\": \"snow\", \"year\": \"1933\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1934\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1935\", \"count\": 3}, {\"word\": \"snow\", \"year\": \"1936\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1937\", \"count\": 1}, {\"word\": \"snow\", \"year\": \"1938\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1939\", \"count\": 1}, {\"word\": \"snow\", \"year\": \"1940\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1941\", \"count\": 2}, {\"word\": \"snow\", \"year\": \"1942\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1943\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1944\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1945\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1946\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1947\", \"count\": 1}, {\"word\": \"snow\", \"year\": \"1948\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1949\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1950\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1951\", \"count\": 6}, {\"word\": \"snow\", \"year\": \"1952\", \"count\": 3}, {\"word\": \"snow\", \"year\": \"1953\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1954\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1987\", \"count\": 0}, {\"word\": \"snow\", \"year\": \"1988\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1854\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1855\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1856\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1857\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1858\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1859\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1864\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1865\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1866\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1867\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1868\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1869\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1870\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1871\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1872\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1876\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1881\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1882\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1884\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1885\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1889\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1890\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1891\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1892\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1893\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1894\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1895\", \"count\": 2}, {\"word\": \"storm\", \"year\": \"1896\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1897\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1898\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1899\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1900\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1901\", \"count\": 14}, {\"word\": \"storm\", \"year\": \"1902\", \"count\": 162}, {\"word\": \"storm\", \"year\": \"1903\", \"count\": 51}, {\"word\": \"storm\", \"year\": \"1904\", \"count\": 11}, {\"word\": \"storm\", \"year\": \"1905\", \"count\": 9}, {\"word\": \"storm\", \"year\": \"1906\", \"count\": 4}, {\"word\": \"storm\", \"year\": \"1907\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1908\", \"count\": 5}, {\"word\": \"storm\", \"year\": \"1909\", \"count\": 2}, {\"word\": \"storm\", \"year\": \"1910\", \"count\": 5}, {\"word\": \"storm\", \"year\": \"1911\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1912\", \"count\": 3}, {\"word\": \"storm\", \"year\": \"1913\", \"count\": 3}, {\"word\": \"storm\", \"year\": \"1914\", \"count\": 11}, {\"word\": \"storm\", \"year\": \"1915\", \"count\": 6}, {\"word\": \"storm\", \"year\": \"1916\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1917\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1918\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1919\", \"count\": 2}, {\"word\": \"storm\", \"year\": \"1920\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1921\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1922\", \"count\": 6}, {\"word\": \"storm\", \"year\": \"1923\", \"count\": 5}, {\"word\": \"storm\", \"year\": \"1924\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1925\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1926\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1927\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1928\", \"count\": 10}, {\"word\": \"storm\", \"year\": \"1929\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1930\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1931\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1932\", \"count\": 4}, {\"word\": \"storm\", \"year\": \"1933\", \"count\": 4}, {\"word\": \"storm\", \"year\": \"1934\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1935\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1936\", \"count\": 25}, {\"word\": \"storm\", \"year\": \"1937\", \"count\": 10}, {\"word\": \"storm\", \"year\": \"1938\", \"count\": 2}, {\"word\": \"storm\", \"year\": \"1939\", \"count\": 7}, {\"word\": \"storm\", \"year\": \"1940\", \"count\": 9}, {\"word\": \"storm\", \"year\": \"1941\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1942\", \"count\": 2}, {\"word\": \"storm\", \"year\": \"1943\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1944\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1945\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1946\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1947\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1948\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1949\", \"count\": 1}, {\"word\": \"storm\", \"year\": \"1950\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1951\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1952\", \"count\": 4}, {\"word\": \"storm\", \"year\": \"1953\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1954\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1987\", \"count\": 0}, {\"word\": \"storm\", \"year\": \"1988\", \"count\": 0}]}}, {\"mode\": \"vega-lite\"});\n",
       "</script>"
      ],
      "text/plain": [
       "alt.Chart(...)"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "alt.Chart(\n",
    "    words_by_year.loc[words_by_year[\"word\"].isin([\"storm\", \"cyclone\", \"snow\"])]\n",
    ").mark_line().encode(\n",
    "    x=alt.X(\"year:Q\", axis=alt.Axis(format=\"c\", title=\"Year\")),\n",
    "    y=\"count:Q\",\n",
    "    color=\"word:N\",\n",
    "    facet=alt.Facet(\"word:N\", columns=1),\n",
    ").properties(\n",
    "    width=700, height=100\n",
    ").resolve_scale(\n",
    "    y=\"independent\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "editable": true,
    "slideshow": {
     "slide_type": ""
    },
    "tags": []
   },
   "source": [
    "----\n",
    "\n",
    "Created by [Tim Sherratt](https://timsherratt.org) ([@wragge](https://twitter.com/wragge)) for the [GLAM Workbench](https://github.com/glam-workbench/).  \n",
    "Support this project by [becoming a GitHub sponsor](https://github.com/sponsors/wragge?o=esb).\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  },
  "rocrate": {
   "author": [
    {
     "name": "Sherratt, Tim",
     "orcid": "https://orcid.org/0000-0001-7956-4498"
    }
   ],
   "category": "exploring",
   "description": "This notebook suggests some ways in which you can aggregate and analyse the individual OCRd text files for each article — look at word frequencies ; calculate TF-IDF values. (Under construction)",
   "mainEntityOfPage": "https://glam-workbench.net/trove-harvester/exploring-troveharvester-text/",
   "name": "Explore harvested text files",
   "position": 3
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}