{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyse a series" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "import pandas as pd\n", "from IPython.display import Image as DImage\n", "from IPython.core.display import display, HTML\n", "import series_details\n", "\n", "# Plotly helps us make pretty charts\n", "import plotly.offline as py\n", "import plotly.graph_objs as go\n", "\n", "# This lets Plotly draw charts in cells\n", "py.init_notebook_mode()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook is for analysing a series that you've already harvested. If you haven't harvested any data yet, then you need to go back to the ['Harvesting a series' notebook](Harvesting series.ipynb)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# What series do you want to analyse?\n", "# Insert the series id between the quotes.\n", "series = 'B13'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Load the CSV data for the specified series into a dataframe. Parse the dates as dates!\n", "df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get some summary data\n", "\n", "We're going to create a simple summary of some of the main characteristics of the series, as reflected in the harvested files." ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# We're going to assemble some summary data about the series in a 'summary' dictionary\n", "# Let's create the dictionary and add the series identifier\n", "summary = {'series': series}" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "20194\n" ] } ], "source": [ "# The 'shape' property returns the number of rows and columns. So 'shape[0]' gives us the number of items harvested.\n", "summary['total_items'] = df.shape[0]\n", "print(summary['total_items'])" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'Open': 19786, 'Not yet examined': 400, 'Open with exception': 8}\n" ] } ], "source": [ "# Get the frequency of the different access status categories\n", "summary['access_counts'] = df['access_status'].value_counts().to_dict()\n", "print(summary['access_counts'])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "354\n" ] } ], "source": [ "# Get the number of files that have been digitised\n", "summary['digitised_files'] = len(df.loc[df['digitised_status'] == True])\n", "print(summary['digitised_files'])" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5043\n" ] } ], "source": [ "# Get the number of individual pages that have been digitised\n", "summary['digitised_pages'] = df['digitised_pages'].sum()\n", "print(summary['digitised_pages'])" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1800\n" ] } ], "source": [ "# Get the earliest start date\n", "summary['date_from'] = df['start_date'].min().year\n", "print(summary['date_from'])" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2005\n" ] } ], "source": [ "# Get the latest end date\n", "summary['date_to'] = df['end_date'].max().year\n", "print(summary['date_to'])" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SERIES: B13\n", "Number of items: 20,194\n", "Access status:\n", " Open: 19,786\n", " Not yet examined: 400\n", " Open with exception: 8\n", "Contents dates: 1800 to 2005\n", "Digitised files: 354\n", "Digitised pages: 5,043\n" ] } ], "source": [ "# Let's display all the summary data\n", "print('SERIES: {}'.format(summary['series']))\n", "print('Number of items: {:,}'.format(summary['total_items']))\n", "print('Access status:')\n", "for status, total in summary['access_counts'].items():\n", " print(' {}: {:,}'.format(status, total))\n", "print('Contents dates: {} to {}'.format(summary['date_from'], summary['date_to']))\n", "print('Digitised files: {:,}'.format(summary['digitised_files']))\n", "print('Digitised pages: {:,}'.format(summary['digitised_pages']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that a slightly enhanced version of the code above is available in the `series_details` module that you can import into any notebook. So to create a summary of a series you can just:" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "

NAA Series B13

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

General and classified correspondence, annual single number series

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Total items20,194
Access status
Open19,786 (97.98%)
Not yet examined400 (1.98%)
Open with exception8 (0.04%)
Number of items digitised354 (1.75%)
Number of pages digitised5,043
Date of earliest content1800
Date of latest content2005
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Import the module\n", "import series_details\n", "\n", "# Call display_series() providing the series name and the dataframe\n", "series_details.display_summary(series, df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot the contents dates\n", "\n", "Plotting the dates is a bit tricky. Each file can have both a start date and an end date. So if we want to plot the years covered by a file, we need to include all the years between the start and end dates. Also dates can be recorded at different levels of granularity, for specific days to just years. And sometimes there are no end dates recorded at all – what does this mean?\n", "\n", "The code in the cell below does a few things:\n", "\n", "* It fills any empty end dates with the start date from the same item. This probably means some content years will be missed, but it's the only date we can be certain of.\n", "* It loops through all the rows in the dataframe, then for each row it extracts the years between the start and end date. Currently this looks to see if the 1 January is covered by the date range, so if there's an exact start date after 1 January I don't think it will be captured. I need to investigate this further.\n", "* It combines all of the years into one big series and then totals up the frquency of each year.\n", "\n", "I'm sure this is not perfect, but it seems to produce useful results.\n" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "# Fill any blank end dates with start dates\n", "df['end_date'] = df[['end_date']].apply(lambda x: x.fillna(value=df['start_date']))\n", "\n", "# This is a bit tricky.\n", "# For each item we want to find the years that it has content from -- ie start_year <= year <= end_year.\n", "# Then we want to put all the years from all the items together and look at their frequency\n", "years = pd.concat([pd.date_range(\n", " start=row.start_date, \n", " end=row.end_date, \n", " freq='AS').year.to_series() for row in df.itertuples(index=False)]).value_counts()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "# Put the resulting series in a dataframe so it looks pretty.\n", "year_totals = pd.DataFrame(years)\n", "\n", "# Sort results by year\n", "year_totals.sort_index(inplace=True)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
18001
18982
18992
19003
19014
190234
190320
190412
190513
190616
190717
190819
190933
191047
191150
1912113
1913106
191496
1915130
191691
191786
191888
1919113
1920136
1921148
1922777
1923920
19241,044
19251,154
19261,498
19271,431
19281,328
19291,213
19301,128
1931998
1932848
1933939
1934949
1935973
19361,010
19371,076
1938155
1939981
19401,572
1941274
1942211
1943162
1944174
1945175
1946251
1947117
1948121
1949134
1950146
1951157
1952166
1953180
1954194
1955200
1956221
1957238
1958251
1959266
1960270
1961273
1962278
1963280
1964280
1965285
1966286
1967289
1968288
1969292
1970296
1971296
1972280
1973275
1974255
1975244
1976225
1977205
1978187
1979172
1980171
1981151
1982134
1983122
1984116
198592
198676
198762
198853
198941
199033
199123
199220
199322
199419
199524
199614
199710
19987
19996
20004
20013
20022
20031
20041
20051
" ], "text/plain": [ "" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Display the results\n", "year_totals.style.format({0: '{:,}'})" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "type": "bar", "x": [ 1800, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 ], "y": [ 1, 2, 2, 3, 4, 34, 20, 12, 13, 16, 17, 19, 33, 47, 50, 113, 106, 96, 130, 91, 86, 88, 113, 136, 148, 777, 920, 1044, 1154, 1498, 1431, 1328, 1213, 1128, 998, 848, 939, 949, 973, 1010, 1076, 155, 981, 1572, 274, 211, 162, 174, 175, 251, 117, 121, 134, 146, 157, 166, 180, 194, 200, 221, 238, 251, 266, 270, 273, 278, 280, 280, 285, 286, 289, 288, 292, 296, 296, 280, 275, 255, 244, 225, 205, 187, 172, 171, 151, 134, 122, 116, 92, 76, 62, 53, 41, 33, 23, 20, 22, 19, 24, 14, 10, 7, 6, 4, 3, 2, 1, 1, 1 ] } ], "layout": { "title": "Content dates", "xaxis": { "title": "Year" }, "yaxis": { "title": "Number of items" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Let's graph the frequency of content years\n", "plotly_data = [go.Bar(\n", " x=year_totals.index.values, # The years are the index\n", " y=year_totals[0]\n", " )]\n", "\n", "# Add some labels\n", "layout = go.Layout(\n", " title='Content dates',\n", " xaxis=dict(\n", " title='Year'\n", " ),\n", " yaxis=dict(\n", " title='Number of items'\n", " )\n", ")\n", "\n", "# Create a chart \n", "fig = go.Figure(data=plotly_data, layout=layout)\n", "py.iplot(fig, filename='series-dates-bar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that a slightly enhanced version of the code above is available in the series_details module that you can import into any notebook. So to create a summary of a series you can just:" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "Digitised", "type": "bar", "x": [ 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972 ], "y": [ 1, 1, 1, 3, 2, 7, 6, 6, 12, 9, 7, 15, 11, 12, 11, 29, 19, 21, 25, 28, 30, 26, 31, 22, 31, 19, 21, 24, 30, 19, 17, 4, 5, 9, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1 ] }, { "name": "Not digitised", "type": "bar", "x": [ 1800, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 ], "y": [ 1, 2, 2, 3, 4, 34, 20, 12, 13, 16, 16, 18, 32, 44, 48, 106, 100, 90, 118, 82, 79, 73, 102, 124, 137, 748, 901, 1023, 1129, 1470, 1401, 1302, 1182, 1106, 967, 829, 918, 925, 943, 991, 1059, 151, 976, 1563, 272, 209, 160, 172, 172, 249, 116, 120, 133, 145, 156, 165, 179, 193, 198, 218, 235, 248, 263, 267, 270, 275, 277, 277, 284, 285, 288, 287, 291, 295, 295, 279, 275, 255, 244, 225, 205, 187, 172, 171, 151, 134, 122, 116, 92, 76, 62, 53, 41, 33, 23, 20, 22, 19, 24, 14, 10, 7, 6, 4, 3, 2, 1, 1, 1 ] } ], "layout": { "barmode": "stack", "title": "Content dates", "xaxis": { "title": "Year" }, "yaxis": { "title": "Number of items" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Import the module\n", "import series_details\n", "\n", "# Call plot_series() providing the series name and the dataframe\n", "series_details.plot_dates(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Filter by words in file titles" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'df' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0msearch_term\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'wife'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mdf_filtered\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'title'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontains\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msearch_term\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcase\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mdf_filtered\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined" ] } ], "source": [ "# Find titles containing a particular phrase -- in this case 'wife'\n", "# This creates a new dataframe called 'df_wives'\n", "# Try changing this to filter for other words\n", "\n", "search_term = 'wife'\n", "df_filtered = df.loc[df['title'].str.contains(search_term, case=False)].copy()\n", "df_filtered" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "Digitised", "type": "bar", "x": [ 1922, 1923, 1925, 1928, 1929, 1930, 1931, 1932, 1935, 1936, 1937 ], "y": [ 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1 ] }, { "name": "Not digitised", "type": "bar", "x": [ 1902, 1903, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937 ], "y": [ 1, 1, 1, 1, 2, 3, 3, 3, 3, 6, 13, 47, 28, 26, 14, 15, 24, 23, 15, 23, 34, 29, 41, 32, 29 ] } ], "layout": { "barmode": "stack", "title": "Content dates", "xaxis": { "title": "Year" }, "yaxis": { "title": "Number of items" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# We can plot this filtered dataframe just like the series\n", "series_details.plot_dates(df_filtered)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "# Save the new dataframe as a csv\n", "df_filtered.to_csv(os.path.join('data', '{}-{}.csv'.format(series.replace('/', '-'), search_term)))" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifierseriescontrol_symboltitlecontents_datesstart_dateend_dateaccess_statuslocationdigitised_statusdigitised_pages
494406808B131912/11171Application for Exemption Certificate, Ah Fan1912 - 19121912-01-011912-01-01OpenMelbourneFalse0
496406815B131912/6389Application for Exemption Certificate, Charlie...1912 - 19121912-01-011912-01-01OpenMelbourneFalse0
499406824B131912/10341Application for Exemption Certificate, Ah Yow1912 - 19121912-01-011912-01-01OpenMelbourneFalse0
501406830B131912/4220Application for Exemption Certificate, Ah Wah1912 - 19121912-01-011912-01-01OpenMelbourneFalse0
506406850B131912/450Application for Exemption Certificate, Ah Get1911 - 19121911-01-011912-01-01OpenMelbourneFalse0
508406857B131911/21298Application for Certificates of Exemption, Ah ...1911 - 19131911-01-011913-01-01OpenMelbourneFalse0
509406860B131911/21305Application for Certificate of Exemption, Ah C...1911 - 19131911-01-011913-01-01OpenMelbourneFalse0
511406868B131912/433Cutting from \"Sunday Times\" W.A. re Lee Keong ...1912 - 19121912-01-011912-01-01OpenMelbourneFalse0
513406880B131911/13173Application for Exemption Certificate, Ah Goune1911 - 19131911-01-011913-01-01OpenMelbourneFalse0
514406883B131911/14554Prohibited Immigrant, Herbert Ah Loy1911 - 19111911-01-011911-01-01OpenMelbourneFalse0
516406893B131910/6136Application for Exemption Certificate, George ...1910 - 19121910-01-011912-01-01OpenMelbourneFalse0
519406910B131911/4400Application for Exemption Certificate, Ah Cheong1911 - 19121911-01-011912-01-01OpenMelbourneFalse0
520406915B131911/11444Application for Exemption Certificate, Ah Hee1911 - 19141911-01-011914-01-01OpenMelbourneFalse0
527406943B131903/7130Admittance of Chinese to Commonwealth on prese...1903 - 19031903-01-011903-01-01OpenMelbourneFalse0
528406947B131903/9491(Local) Chinese, being permitted to go onboard...1903 - 19031903-01-011903-01-01OpenMelbourneFalse0
530407082B131902/4295Query- is wife of Chinese who has been a resid...1902 - 19021902-01-011902-01-01OpenMelbourneFalse0
534407104B131902/547Chinese coming from another State. Will they b...1902 - 19021902-01-011902-01-01OpenMelbourneFalse0
535407121B131902/713Immigration Restriction Act to be applied in p...1902 - 19021902-01-011902-01-01OpenMelbourneFalse0
536407127B131902/1166Care to be exercised in admission of Chinese o...1902 - 19021902-01-011902-01-01OpenMelbourneFalse0
537407133B131902/2793Opinion of Attorney General as to whether a Ch...1902 - 19021902-01-011902-01-01OpenMelbourneFalse0
540407154B131902/464Immigration Restriction Act. Re Chinese passin...1902 - 19021902-01-011902-01-01OpenMelbourneFalse0
541407161B131902/487Re Chinese (4) on board the \"Clitus\". Are 2 Sp...1902 - 19021902-01-011902-01-01OpenMelbourneFalse0
542407168B131902/3720Re Certificate of Domicile for Ah Poy. Asks if...1902 - 19021902-01-011902-01-01OpenMelbourneFalse0
544407185B131919/5628Quan Ah Sam - Refused Certificate for Exemptio...1919 - 19191919-01-011919-01-01OpenMelbourneFalse0
548407212B131905/5371Appeal of certain Chinese against conviction a...1905 - 19051905-01-011905-01-01OpenMelbourneFalse0
549407220B131908/14693Attempts made to effect substitution of other ...1908 - 19081908-01-011908-01-01OpenMelbourneFalse0
557407277B131909/11714Illicit entry of Chinese presenting Naturaliza...1909 - 19091909-01-011909-01-01OpenMelbourneFalse0
558407283B131909/16634Ah Bing applies for Certificate under Section ...1909 - 19091909-01-011909-01-01OpenMelbourneFalse0
562407303B131909/3855Family & Staff of Mr. Liang Lan-Hsun Chinese C...1909 - 19091909-01-011909-01-01OpenMelbourneFalse0
567407326B131908/8431Ah Woo appln for Cert.1906 - 19061906-01-011906-01-01OpenMelbourneFalse0
....................................
198275945296B131923/24954Ah Yuck - applies for a Certificate of Exempti...1923 - 19231923-01-011923-01-01OpenMelbourneFalse0
198295945298B131924/2065Chinese passengers per SS ARAFURA permitted to...1924 - 19241924-01-011924-01-01OpenMelbourneFalse0
198365945305B131924/14752Ah Quon, Application for Certificate of Exempt...1924 - 19241924-01-011924-01-01OpenMelbourneFalse0
198425945400B131926/8111George Lum, Ah Fang, Ah Hing - Victorian Certi...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
198435945401B131926/8940Ah Dow Certificate of Exemption from Dictation...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
198565945414B131926/25647Chinese passengers arriving at Melbourne on 16...1926 - 19271926-01-011927-01-01OpenMelbourneFalse0
198635945421B131926/11270Ah Din application for Certificate of Exemptio...1925 - 19301925-01-011930-01-01OpenMelbourneFalse0
199135953257B131932/8267Exemption from Dictation Test of Chinese perso...1932 - 19321932-01-011932-01-01OpenMelbourneFalse0
199175953261B131932/6671Kee Sik Kwai and Chow Mow Pun - Chinese ex SS ...1932 - 19321932-01-011932-01-01OpenMelbourneFalse0
199546045355B131936/11020Chin Wat, Lam Kee, Louey Doon, Chinese passeng...1936 - 19361936-01-011936-01-01OpenMelbourneFalse0
199866551572B131925/27508Ah Kong Application for a Certificate of Exemp...1925 - 19271925-01-011927-01-01OpenMelbourneFalse0
199906551576B131925/14137Ah Goon and Mah Wah arrival in Melbourne per S...1925 - 19251925-01-011925-01-01OpenMelbourneFalse0
199956551581B131925/24262Rejected application by Mr Ah On from the firm...1925 - 19261925-01-011926-01-01OpenMelbourneFalse0
200436553344B131933/14364Chia Tak Eng, Chinese member of crew of s.s. \"...1933 - 19331933-01-011933-01-01OpenMelbourneFalse0
200616553362B131937/16107Ah Wing - application for C.E.D.T. (Certificat...1926 - 19401926-01-011940-01-01OpenMelbourneFalse0
2009510311991B131933/24224Chin Ah Leong aka Willie C Long applies for Ce...1927 - 19331927-01-011933-01-01OpenMelbourneFalse0
2011510535295B131933/23067Ah Wee [Ah Way] - Apllication for CEDT1898 - 19461898-01-011946-01-01OpenMelbourneFalse0
2011710538200B131933/24225Ah Joe applies for Certificate Exempting from ...1926 - 19331926-01-011933-01-01OpenMelbourneFalse0
2012310559103B131911/2239Ah Jack - Application for Certificate of Exemp...1910 - 19161910-01-011916-01-01OpenMelbourneFalse0
2012711979730B131912/12664Poon Ah Soo - Certificate of Exemption from Di...1912 - 19121912-01-011912-01-01OpenMelbourneFalse0
2013011993962B131932/5459Arrival of Chinese passengers ex S.S. Taiping1932 - 19321932-01-011932-01-01OpenMelbourneFalse0
2015230762921B131916/6339Ah Lock, Application for C.E.D.T [includes pho...1916 - 19161916-01-011916-01-01OpenMelbourneFalse0
2015830762927B131916/7813Ah Chow, Application for C.E.D.T [includes pho...1916 - 19161916-01-011916-01-01OpenMelbourneFalse0
2015930762929B131922/9340Collector of Customs - Melbourne - Five Chines...1922 - 19221922-01-011922-01-01OpenMelbourneFalse0
2016030762930B131922/9690C.E.D.T Book 259 Number 72 relating to Chinese...1922 - 19221922-01-011922-01-01OpenMelbourneFalse0
2016130762931B131913/484Ah Louey; Application for C.E.D.T [includes p...1913 - 19141913-01-011914-01-01OpenMelbourneFalse0
2016630762942B131924/8009Ah hing and Tang Cheong; Deserters from the S....1924 - 19241924-01-011924-01-01OpenMelbourneFalse0
2017930762957B131926/24036Ah Hing - C.E.D.T in favour, leaving port Melb...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2018230762960B131926/26816Ah Jick - Application for C.E.D.T1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2018530762974B131929/22619Ah Jim; Application for C.E.D.T1929 - 19391929-01-011939-01-01OpenMelbourneFalse0
\n", "

2457 rows × 11 columns

\n", "
" ], "text/plain": [ " identifier series control_symbol \\\n", "494 406808 B13 1912/11171 \n", "496 406815 B13 1912/6389 \n", "499 406824 B13 1912/10341 \n", "501 406830 B13 1912/4220 \n", "506 406850 B13 1912/450 \n", "508 406857 B13 1911/21298 \n", "509 406860 B13 1911/21305 \n", "511 406868 B13 1912/433 \n", "513 406880 B13 1911/13173 \n", "514 406883 B13 1911/14554 \n", "516 406893 B13 1910/6136 \n", "519 406910 B13 1911/4400 \n", "520 406915 B13 1911/11444 \n", "527 406943 B13 1903/7130 \n", "528 406947 B13 1903/9491 \n", "530 407082 B13 1902/4295 \n", "534 407104 B13 1902/547 \n", "535 407121 B13 1902/713 \n", "536 407127 B13 1902/1166 \n", "537 407133 B13 1902/2793 \n", "540 407154 B13 1902/464 \n", "541 407161 B13 1902/487 \n", "542 407168 B13 1902/3720 \n", "544 407185 B13 1919/5628 \n", "548 407212 B13 1905/5371 \n", "549 407220 B13 1908/14693 \n", "557 407277 B13 1909/11714 \n", "558 407283 B13 1909/16634 \n", "562 407303 B13 1909/3855 \n", "567 407326 B13 1908/8431 \n", "... ... ... ... \n", "19827 5945296 B13 1923/24954 \n", "19829 5945298 B13 1924/2065 \n", "19836 5945305 B13 1924/14752 \n", "19842 5945400 B13 1926/8111 \n", "19843 5945401 B13 1926/8940 \n", "19856 5945414 B13 1926/25647 \n", "19863 5945421 B13 1926/11270 \n", "19913 5953257 B13 1932/8267 \n", "19917 5953261 B13 1932/6671 \n", "19954 6045355 B13 1936/11020 \n", "19986 6551572 B13 1925/27508 \n", "19990 6551576 B13 1925/14137 \n", "19995 6551581 B13 1925/24262 \n", "20043 6553344 B13 1933/14364 \n", "20061 6553362 B13 1937/16107 \n", "20095 10311991 B13 1933/24224 \n", "20115 10535295 B13 1933/23067 \n", "20117 10538200 B13 1933/24225 \n", "20123 10559103 B13 1911/2239 \n", "20127 11979730 B13 1912/12664 \n", "20130 11993962 B13 1932/5459 \n", "20152 30762921 B13 1916/6339 \n", "20158 30762927 B13 1916/7813 \n", "20159 30762929 B13 1922/9340 \n", "20160 30762930 B13 1922/9690 \n", "20161 30762931 B13 1913/484 \n", "20166 30762942 B13 1924/8009 \n", "20179 30762957 B13 1926/24036 \n", "20182 30762960 B13 1926/26816 \n", "20185 30762974 B13 1929/22619 \n", "\n", " title contents_dates \\\n", "494 Application for Exemption Certificate, Ah Fan 1912 - 1912 \n", "496 Application for Exemption Certificate, Charlie... 1912 - 1912 \n", "499 Application for Exemption Certificate, Ah Yow 1912 - 1912 \n", "501 Application for Exemption Certificate, Ah Wah 1912 - 1912 \n", "506 Application for Exemption Certificate, Ah Get 1911 - 1912 \n", "508 Application for Certificates of Exemption, Ah ... 1911 - 1913 \n", "509 Application for Certificate of Exemption, Ah C... 1911 - 1913 \n", "511 Cutting from \"Sunday Times\" W.A. re Lee Keong ... 1912 - 1912 \n", "513 Application for Exemption Certificate, Ah Goune 1911 - 1913 \n", "514 Prohibited Immigrant, Herbert Ah Loy 1911 - 1911 \n", "516 Application for Exemption Certificate, George ... 1910 - 1912 \n", "519 Application for Exemption Certificate, Ah Cheong 1911 - 1912 \n", "520 Application for Exemption Certificate, Ah Hee 1911 - 1914 \n", "527 Admittance of Chinese to Commonwealth on prese... 1903 - 1903 \n", "528 (Local) Chinese, being permitted to go onboard... 1903 - 1903 \n", "530 Query- is wife of Chinese who has been a resid... 1902 - 1902 \n", "534 Chinese coming from another State. Will they b... 1902 - 1902 \n", "535 Immigration Restriction Act to be applied in p... 1902 - 1902 \n", "536 Care to be exercised in admission of Chinese o... 1902 - 1902 \n", "537 Opinion of Attorney General as to whether a Ch... 1902 - 1902 \n", "540 Immigration Restriction Act. Re Chinese passin... 1902 - 1902 \n", "541 Re Chinese (4) on board the \"Clitus\". Are 2 Sp... 1902 - 1902 \n", "542 Re Certificate of Domicile for Ah Poy. Asks if... 1902 - 1902 \n", "544 Quan Ah Sam - Refused Certificate for Exemptio... 1919 - 1919 \n", "548 Appeal of certain Chinese against conviction a... 1905 - 1905 \n", "549 Attempts made to effect substitution of other ... 1908 - 1908 \n", "557 Illicit entry of Chinese presenting Naturaliza... 1909 - 1909 \n", "558 Ah Bing applies for Certificate under Section ... 1909 - 1909 \n", "562 Family & Staff of Mr. Liang Lan-Hsun Chinese C... 1909 - 1909 \n", "567 Ah Woo appln for Cert. 1906 - 1906 \n", "... ... ... \n", "19827 Ah Yuck - applies for a Certificate of Exempti... 1923 - 1923 \n", "19829 Chinese passengers per SS ARAFURA permitted to... 1924 - 1924 \n", "19836 Ah Quon, Application for Certificate of Exempt... 1924 - 1924 \n", "19842 George Lum, Ah Fang, Ah Hing - Victorian Certi... 1926 - 1926 \n", "19843 Ah Dow Certificate of Exemption from Dictation... 1926 - 1926 \n", "19856 Chinese passengers arriving at Melbourne on 16... 1926 - 1927 \n", "19863 Ah Din application for Certificate of Exemptio... 1925 - 1930 \n", "19913 Exemption from Dictation Test of Chinese perso... 1932 - 1932 \n", "19917 Kee Sik Kwai and Chow Mow Pun - Chinese ex SS ... 1932 - 1932 \n", "19954 Chin Wat, Lam Kee, Louey Doon, Chinese passeng... 1936 - 1936 \n", "19986 Ah Kong Application for a Certificate of Exemp... 1925 - 1927 \n", "19990 Ah Goon and Mah Wah arrival in Melbourne per S... 1925 - 1925 \n", "19995 Rejected application by Mr Ah On from the firm... 1925 - 1926 \n", "20043 Chia Tak Eng, Chinese member of crew of s.s. \"... 1933 - 1933 \n", "20061 Ah Wing - application for C.E.D.T. (Certificat... 1926 - 1940 \n", "20095 Chin Ah Leong aka Willie C Long applies for Ce... 1927 - 1933 \n", "20115 Ah Wee [Ah Way] - Apllication for CEDT 1898 - 1946 \n", "20117 Ah Joe applies for Certificate Exempting from ... 1926 - 1933 \n", "20123 Ah Jack - Application for Certificate of Exemp... 1910 - 1916 \n", "20127 Poon Ah Soo - Certificate of Exemption from Di... 1912 - 1912 \n", "20130 Arrival of Chinese passengers ex S.S. Taiping 1932 - 1932 \n", "20152 Ah Lock, Application for C.E.D.T [includes pho... 1916 - 1916 \n", "20158 Ah Chow, Application for C.E.D.T [includes pho... 1916 - 1916 \n", "20159 Collector of Customs - Melbourne - Five Chines... 1922 - 1922 \n", "20160 C.E.D.T Book 259 Number 72 relating to Chinese... 1922 - 1922 \n", "20161 Ah Louey; Application for C.E.D.T [includes p... 1913 - 1914 \n", "20166 Ah hing and Tang Cheong; Deserters from the S.... 1924 - 1924 \n", "20179 Ah Hing - C.E.D.T in favour, leaving port Melb... 1926 - 1926 \n", "20182 Ah Jick - Application for C.E.D.T 1926 - 1926 \n", "20185 Ah Jim; Application for C.E.D.T 1929 - 1939 \n", "\n", " start_date end_date access_status location digitised_status \\\n", "494 1912-01-01 1912-01-01 Open Melbourne False \n", "496 1912-01-01 1912-01-01 Open Melbourne False \n", "499 1912-01-01 1912-01-01 Open Melbourne False \n", "501 1912-01-01 1912-01-01 Open Melbourne False \n", "506 1911-01-01 1912-01-01 Open Melbourne False \n", "508 1911-01-01 1913-01-01 Open Melbourne False \n", "509 1911-01-01 1913-01-01 Open Melbourne False \n", "511 1912-01-01 1912-01-01 Open Melbourne False \n", "513 1911-01-01 1913-01-01 Open Melbourne False \n", "514 1911-01-01 1911-01-01 Open Melbourne False \n", "516 1910-01-01 1912-01-01 Open Melbourne False \n", "519 1911-01-01 1912-01-01 Open Melbourne False \n", "520 1911-01-01 1914-01-01 Open Melbourne False \n", "527 1903-01-01 1903-01-01 Open Melbourne False \n", "528 1903-01-01 1903-01-01 Open Melbourne False \n", "530 1902-01-01 1902-01-01 Open Melbourne False \n", "534 1902-01-01 1902-01-01 Open Melbourne False \n", "535 1902-01-01 1902-01-01 Open Melbourne False \n", "536 1902-01-01 1902-01-01 Open Melbourne False \n", "537 1902-01-01 1902-01-01 Open Melbourne False \n", "540 1902-01-01 1902-01-01 Open Melbourne False \n", "541 1902-01-01 1902-01-01 Open Melbourne False \n", "542 1902-01-01 1902-01-01 Open Melbourne False \n", "544 1919-01-01 1919-01-01 Open Melbourne False \n", "548 1905-01-01 1905-01-01 Open Melbourne False \n", "549 1908-01-01 1908-01-01 Open Melbourne False \n", "557 1909-01-01 1909-01-01 Open Melbourne False \n", "558 1909-01-01 1909-01-01 Open Melbourne False \n", "562 1909-01-01 1909-01-01 Open Melbourne False \n", "567 1906-01-01 1906-01-01 Open Melbourne False \n", "... ... ... ... ... ... \n", "19827 1923-01-01 1923-01-01 Open Melbourne False \n", "19829 1924-01-01 1924-01-01 Open Melbourne False \n", "19836 1924-01-01 1924-01-01 Open Melbourne False \n", "19842 1926-01-01 1926-01-01 Open Melbourne False \n", "19843 1926-01-01 1926-01-01 Open Melbourne False \n", "19856 1926-01-01 1927-01-01 Open Melbourne False \n", "19863 1925-01-01 1930-01-01 Open Melbourne False \n", "19913 1932-01-01 1932-01-01 Open Melbourne False \n", "19917 1932-01-01 1932-01-01 Open Melbourne False \n", "19954 1936-01-01 1936-01-01 Open Melbourne False \n", "19986 1925-01-01 1927-01-01 Open Melbourne False \n", "19990 1925-01-01 1925-01-01 Open Melbourne False \n", "19995 1925-01-01 1926-01-01 Open Melbourne False \n", "20043 1933-01-01 1933-01-01 Open Melbourne False \n", "20061 1926-01-01 1940-01-01 Open Melbourne False \n", "20095 1927-01-01 1933-01-01 Open Melbourne False \n", "20115 1898-01-01 1946-01-01 Open Melbourne False \n", "20117 1926-01-01 1933-01-01 Open Melbourne False \n", "20123 1910-01-01 1916-01-01 Open Melbourne False \n", "20127 1912-01-01 1912-01-01 Open Melbourne False \n", "20130 1932-01-01 1932-01-01 Open Melbourne False \n", "20152 1916-01-01 1916-01-01 Open Melbourne False \n", "20158 1916-01-01 1916-01-01 Open Melbourne False \n", "20159 1922-01-01 1922-01-01 Open Melbourne False \n", "20160 1922-01-01 1922-01-01 Open Melbourne False \n", "20161 1913-01-01 1914-01-01 Open Melbourne False \n", "20166 1924-01-01 1924-01-01 Open Melbourne False \n", "20179 1926-01-01 1926-01-01 Open Melbourne False \n", "20182 1926-01-01 1926-01-01 Open Melbourne False \n", "20185 1929-01-01 1939-01-01 Open Melbourne False \n", "\n", " digitised_pages \n", "494 0 \n", "496 0 \n", "499 0 \n", "501 0 \n", "506 0 \n", "508 0 \n", "509 0 \n", "511 0 \n", "513 0 \n", "514 0 \n", "516 0 \n", "519 0 \n", "520 0 \n", "527 0 \n", "528 0 \n", "530 0 \n", "534 0 \n", "535 0 \n", "536 0 \n", "537 0 \n", "540 0 \n", "541 0 \n", "542 0 \n", "544 0 \n", "548 0 \n", "549 0 \n", "557 0 \n", "558 0 \n", "562 0 \n", "567 0 \n", "... ... \n", "19827 0 \n", "19829 0 \n", "19836 0 \n", "19842 0 \n", "19843 0 \n", "19856 0 \n", "19863 0 \n", "19913 0 \n", "19917 0 \n", "19954 0 \n", "19986 0 \n", "19990 0 \n", "19995 0 \n", "20043 0 \n", "20061 0 \n", "20095 0 \n", "20115 0 \n", "20117 0 \n", "20123 0 \n", "20127 0 \n", "20130 0 \n", "20152 0 \n", "20158 0 \n", "20159 0 \n", "20160 0 \n", "20161 0 \n", "20166 0 \n", "20179 0 \n", "20182 0 \n", "20185 0 \n", "\n", "[2457 rows x 11 columns]" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Find titles containing one of two words -- ie an OR statement\n", "# Try changing this to filter for other words\n", "\n", "df_filtered = df.loc[df['title'].str.contains('chinese', case=False) | df['title'].str.contains(r'\\bah\\b', case=False)].copy()\n", "df_filtered" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Filter by date range" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifierseriescontrol_symboltitlecontents_datesstart_dateend_dateaccess_statuslocationdigitised_statusdigitised_pages
0787258B131924/7516Charlie Lam Sun (Charlie Shack Mayberry) - Arr...1924 - circa19241924-01-011924-01-01OpenMelbourneFalse0
1790335B131926/6755Edward Traynor - permission to enter Australia...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
397405608B131930/7915Prospective Italian Migrants: Family of Pitron...1930 - 19301930-01-011930-01-01OpenMelbourneFalse0
398405614B131930/14816Alien Migration to Australia: Landing Money re...1928 - 19301928-01-011930-01-01OpenMelbourneFalse0
399405618B131930/18541Passengers for New Zealand aboard R.M.S. \"Orvi...1930 - 19301930-01-011930-01-01OpenMelbourneFalse0
470406689B131930/16951Immigration Act - 1901-1925 - Deportation for ...1927 - 19301927-01-011930-01-01OpenMelbourneFalse0
619407878B131926/13464Mowsey Inagaki, ex S.S. \"Tango Maru\",1924 - 19261924-01-011926-01-01OpenMelbourneTrue4
620407909B131929/16370Lazare Morel, Mauritius deserter ex \"King John...1929 - 19291929-01-011929-01-01OpenMelbourneFalse0
639408060B131930/510Request by Customs, N.S.W., for verification a...1930 - 19301930-01-011930-01-01OpenMelbourneFalse0
640408067B131930/9190Alfredo Debono - Contract immigrant ex S.S. \"B...1930 - 19301930-01-011930-01-01OpenMelbourneFalse0
641408074B131929/12987Emmanuel Vassalo - restricted crew member S.S....1929 - 19291929-01-011929-01-01OpenMelbourneFalse0
642408080B131927/18773Application from Joseph Gauci to bring nephew,...1927 - 19271927-01-011927-01-01OpenMelbourneFalse0
643408083B131927/25816Re Francis Grech, Maltese1927 - 19271927-01-011927-01-01OpenMelbourneFalse0
644408086B131927/9675Michael Caruana - re endorsement/renewal of pa...1922 - 19271922-01-011927-01-01OpenMelbourneFalse0
645408092B131927/18178Admission of Maltese into Commonwealth1920 - 19271920-01-011927-01-01OpenMelbourneFalse0
646408106B131927/341Salvatore Camillori: departure per S.S. \"Regin...1926 - 19271926-01-011927-01-01OpenMelbourneFalse0
647408111B131927/2190Carmelo Meilak, Maltese - Deported per R.M.S. ...1927 - 19271927-01-011927-01-01OpenMelbourneFalse0
648408115B131925/7385Arrival of 34 Maltese on \"Ville De Verdun\", \"R...1925 - 19251925-01-011925-01-01OpenMelbourneFalse0
649408125B131926/10370Maltese passengers ex S.S. \"Ville de Verdun\"1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
650408159B131923/6818Joseph Farregan, Maltese seaman ex S.S. \"Gilgai\"1923 - 19241923-01-011924-01-01OpenMelbourneFalse0
651408170B131925/4224Application by Joseph Cassar - to bring brothe...1925 - 19251925-01-011925-01-01OpenMelbourneFalse0
652408178B131929/7535Arrival S.S. \"Citta di Genova\", Apr. 1929: Thr...1929 - 19291929-01-011929-01-01OpenMelbourneFalse0
653408195B131926/3599Atto di Chiamata Forms - Italian passengers ex...1925 - 19261925-01-011926-01-01OpenMelbourneFalse0
654408223B131926/21135Atto Di Chiamata Forms for Italian passengers ...1925 - 19261925-01-011926-01-01OpenMelbourneFalse0
655408232B131928/1846Atto Di Chiamata Forms - Italian passengers ex...1926 - 19271926-01-011927-01-01OpenMelbourneFalse0
656408240B131926/794Kichiji Owa: Certificate of exemption - visit ...1925 - 19261925-01-011926-01-01OpenMelbourneFalse0
657408245B131926/4652Passenger on S.S.\"Regina D'Italia\": Michele La...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
658408252B131926/16579Sponsorship of Italian Migrant: Mrs Filomena G...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
659408258B131926/16580Sponsorship of English Migrant: Mrs. Neilson1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
660408266B131927/21931Prospective deportation of Italian Migrant: Gi...1927 - 19271927-01-011927-01-01OpenMelbourneFalse0
....................................
2014830762917B131925/6271Leslie Macdonald - arrival per Beltana 17 Augu...1925 - 19291925-01-011929-01-01OpenMelbourneFalse0
2015930762929B131922/9340Collector of Customs - Melbourne - Five Chines...1922 - 19221922-01-011922-01-01OpenMelbourneFalse0
2016030762930B131922/9690C.E.D.T Book 259 Number 72 relating to Chinese...1922 - 19221922-01-011922-01-01OpenMelbourneFalse0
2016230762936B131923/782William Dalton ex \"Largs Bay\"; Application for...1923 - 19231923-01-011923-01-01OpenMelbourneFalse0
2016330762937B131923/5170Report by the boarding inspector regarding E. ...1923 - 19231923-01-011923-01-01OpenMelbourneFalse0
2016430762939B131923/18291Mrs. Mary Ann Hamilton arriving Melbourne per ...1923 - 19241923-01-011924-01-01OpenMelbourneFalse0
2016530762940B131923/18459Restricted Passengers S.S. Ulysses; Miss Curry...1923 - 19231923-01-011923-01-01OpenMelbourneFalse0
2016630762942B131924/8009Ah hing and Tang Cheong; Deserters from the S....1924 - 19241924-01-011924-01-01OpenMelbourneFalse0
2016730762943B131925/9274Letter of Admission for Miss Isabella Scott, T...1925 - 19251925-01-011925-01-01OpenMelbourneFalse0
2016830762944B131925/18878Mrs. Sarah Hayhurst; permission to disembark a...1925 - 19251925-01-011925-01-01OpenMelbourneFalse0
2016930762945B131925/19725William Dearing, Permission to disembark in Me...1925 - 19251925-01-011925-01-01OpenMelbourneFalse0
2017130762948B131925/28776Passengers of the S.S. Gascoyne; Luada, Bux, K...1925 - 19251925-01-011925-01-01OpenMelbourneFalse0
2017230762949B131926/5387Ruth Ellen ROgers, Auckland, New Zealand - Req...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2017330762950B131926/5504Application for return of Passport/Permit - Go...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2017430762952B131926/8214Atto di Chiamata forms for Italians ex \"Orama\"...1925 - 19261925-01-011926-01-01OpenMelbourneFalse0
2017530762953B131926/10418Departure of A.Baldiserra aboard S.S. Orsova o...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2017630762954B131926/12411William Mark Snow, Prohibited passanger aboard...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2017730762955B131926/12533Thomas D.L. Canning; ex S.S. 'Esperance Bay' f...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2017830762956B131926/18497Alexander Martin - Ex SS Balranald, boarded SS...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2017930762957B131926/24036Ah Hing - C.E.D.T in favour, leaving port Melb...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2018030762958B131926/25071Crispino Bedont - Leaving Commonwealth aboard ...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2018230762960B131926/26816Ah Jick - Application for C.E.D.T1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2018330762961B131926/29708Wong Kie; Application for C.E.D.T [Includes tw...1926 - 19261926-01-011926-01-01OpenMelbourneFalse0
2018430762962B131927/18772T. Sugimoto ex S.S. \"Aki Maru\"1927 - 19271927-01-011927-01-01OpenMelbourneFalse0
2018730783406B131927/26439Restricted persons - RMS Mooltan1927 - 19271927-01-011927-01-01OpenMelbourneFalse0
2018830783541B131925/1448Mrs Antonia Joseph - arrival and departure per...1925 - 19251925-01-011925-01-01OpenMelbourneFalse0
2018960184762B131922/7845Fazal Deen - application for extension of Cert...1922 - circa19221922-01-011922-01-01OpenMelbourneTrue1
2019060184763B131922/7845Fazal Deen - application for extension of Cert...1922 - circa19221922-01-011922-01-01OpenMelbourneTrue1
2019160184764B131922/7845Fazal Deen - application for extension of Cert...1922 - circa19221922-01-011922-01-01OpenMelbourneFalse0
2019260184765B131922/7845Fazal Deen - application for extension of Cert...1922 - circa19221922-01-011922-01-01OpenMelbourneTrue1
\n", "

9570 rows × 11 columns

\n", "
" ], "text/plain": [ " identifier series control_symbol \\\n", "0 787258 B13 1924/7516 \n", "1 790335 B13 1926/6755 \n", "397 405608 B13 1930/7915 \n", "398 405614 B13 1930/14816 \n", "399 405618 B13 1930/18541 \n", "470 406689 B13 1930/16951 \n", "619 407878 B13 1926/13464 \n", "620 407909 B13 1929/16370 \n", "639 408060 B13 1930/510 \n", "640 408067 B13 1930/9190 \n", "641 408074 B13 1929/12987 \n", "642 408080 B13 1927/18773 \n", "643 408083 B13 1927/25816 \n", "644 408086 B13 1927/9675 \n", "645 408092 B13 1927/18178 \n", "646 408106 B13 1927/341 \n", "647 408111 B13 1927/2190 \n", "648 408115 B13 1925/7385 \n", "649 408125 B13 1926/10370 \n", "650 408159 B13 1923/6818 \n", "651 408170 B13 1925/4224 \n", "652 408178 B13 1929/7535 \n", "653 408195 B13 1926/3599 \n", "654 408223 B13 1926/21135 \n", "655 408232 B13 1928/1846 \n", "656 408240 B13 1926/794 \n", "657 408245 B13 1926/4652 \n", "658 408252 B13 1926/16579 \n", "659 408258 B13 1926/16580 \n", "660 408266 B13 1927/21931 \n", "... ... ... ... \n", "20148 30762917 B13 1925/6271 \n", "20159 30762929 B13 1922/9340 \n", "20160 30762930 B13 1922/9690 \n", "20162 30762936 B13 1923/782 \n", "20163 30762937 B13 1923/5170 \n", "20164 30762939 B13 1923/18291 \n", "20165 30762940 B13 1923/18459 \n", "20166 30762942 B13 1924/8009 \n", "20167 30762943 B13 1925/9274 \n", "20168 30762944 B13 1925/18878 \n", "20169 30762945 B13 1925/19725 \n", "20171 30762948 B13 1925/28776 \n", "20172 30762949 B13 1926/5387 \n", "20173 30762950 B13 1926/5504 \n", "20174 30762952 B13 1926/8214 \n", "20175 30762953 B13 1926/10418 \n", "20176 30762954 B13 1926/12411 \n", "20177 30762955 B13 1926/12533 \n", "20178 30762956 B13 1926/18497 \n", "20179 30762957 B13 1926/24036 \n", "20180 30762958 B13 1926/25071 \n", "20182 30762960 B13 1926/26816 \n", "20183 30762961 B13 1926/29708 \n", "20184 30762962 B13 1927/18772 \n", "20187 30783406 B13 1927/26439 \n", "20188 30783541 B13 1925/1448 \n", "20189 60184762 B13 1922/7845 \n", "20190 60184763 B13 1922/7845 \n", "20191 60184764 B13 1922/7845 \n", "20192 60184765 B13 1922/7845 \n", "\n", " title contents_dates \\\n", "0 Charlie Lam Sun (Charlie Shack Mayberry) - Arr... 1924 - circa1924 \n", "1 Edward Traynor - permission to enter Australia... 1926 - 1926 \n", "397 Prospective Italian Migrants: Family of Pitron... 1930 - 1930 \n", "398 Alien Migration to Australia: Landing Money re... 1928 - 1930 \n", "399 Passengers for New Zealand aboard R.M.S. \"Orvi... 1930 - 1930 \n", "470 Immigration Act - 1901-1925 - Deportation for ... 1927 - 1930 \n", "619 Mowsey Inagaki, ex S.S. \"Tango Maru\", 1924 - 1926 \n", "620 Lazare Morel, Mauritius deserter ex \"King John... 1929 - 1929 \n", "639 Request by Customs, N.S.W., for verification a... 1930 - 1930 \n", "640 Alfredo Debono - Contract immigrant ex S.S. \"B... 1930 - 1930 \n", "641 Emmanuel Vassalo - restricted crew member S.S.... 1929 - 1929 \n", "642 Application from Joseph Gauci to bring nephew,... 1927 - 1927 \n", "643 Re Francis Grech, Maltese 1927 - 1927 \n", "644 Michael Caruana - re endorsement/renewal of pa... 1922 - 1927 \n", "645 Admission of Maltese into Commonwealth 1920 - 1927 \n", "646 Salvatore Camillori: departure per S.S. \"Regin... 1926 - 1927 \n", "647 Carmelo Meilak, Maltese - Deported per R.M.S. ... 1927 - 1927 \n", "648 Arrival of 34 Maltese on \"Ville De Verdun\", \"R... 1925 - 1925 \n", "649 Maltese passengers ex S.S. \"Ville de Verdun\" 1926 - 1926 \n", "650 Joseph Farregan, Maltese seaman ex S.S. \"Gilgai\" 1923 - 1924 \n", "651 Application by Joseph Cassar - to bring brothe... 1925 - 1925 \n", "652 Arrival S.S. \"Citta di Genova\", Apr. 1929: Thr... 1929 - 1929 \n", "653 Atto di Chiamata Forms - Italian passengers ex... 1925 - 1926 \n", "654 Atto Di Chiamata Forms for Italian passengers ... 1925 - 1926 \n", "655 Atto Di Chiamata Forms - Italian passengers ex... 1926 - 1927 \n", "656 Kichiji Owa: Certificate of exemption - visit ... 1925 - 1926 \n", "657 Passenger on S.S.\"Regina D'Italia\": Michele La... 1926 - 1926 \n", "658 Sponsorship of Italian Migrant: Mrs Filomena G... 1926 - 1926 \n", "659 Sponsorship of English Migrant: Mrs. Neilson 1926 - 1926 \n", "660 Prospective deportation of Italian Migrant: Gi... 1927 - 1927 \n", "... ... ... \n", "20148 Leslie Macdonald - arrival per Beltana 17 Augu... 1925 - 1929 \n", "20159 Collector of Customs - Melbourne - Five Chines... 1922 - 1922 \n", "20160 C.E.D.T Book 259 Number 72 relating to Chinese... 1922 - 1922 \n", "20162 William Dalton ex \"Largs Bay\"; Application for... 1923 - 1923 \n", "20163 Report by the boarding inspector regarding E. ... 1923 - 1923 \n", "20164 Mrs. Mary Ann Hamilton arriving Melbourne per ... 1923 - 1924 \n", "20165 Restricted Passengers S.S. Ulysses; Miss Curry... 1923 - 1923 \n", "20166 Ah hing and Tang Cheong; Deserters from the S.... 1924 - 1924 \n", "20167 Letter of Admission for Miss Isabella Scott, T... 1925 - 1925 \n", "20168 Mrs. Sarah Hayhurst; permission to disembark a... 1925 - 1925 \n", "20169 William Dearing, Permission to disembark in Me... 1925 - 1925 \n", "20171 Passengers of the S.S. Gascoyne; Luada, Bux, K... 1925 - 1925 \n", "20172 Ruth Ellen ROgers, Auckland, New Zealand - Req... 1926 - 1926 \n", "20173 Application for return of Passport/Permit - Go... 1926 - 1926 \n", "20174 Atto di Chiamata forms for Italians ex \"Orama\"... 1925 - 1926 \n", "20175 Departure of A.Baldiserra aboard S.S. Orsova o... 1926 - 1926 \n", "20176 William Mark Snow, Prohibited passanger aboard... 1926 - 1926 \n", "20177 Thomas D.L. Canning; ex S.S. 'Esperance Bay' f... 1926 - 1926 \n", "20178 Alexander Martin - Ex SS Balranald, boarded SS... 1926 - 1926 \n", "20179 Ah Hing - C.E.D.T in favour, leaving port Melb... 1926 - 1926 \n", "20180 Crispino Bedont - Leaving Commonwealth aboard ... 1926 - 1926 \n", "20182 Ah Jick - Application for C.E.D.T 1926 - 1926 \n", "20183 Wong Kie; Application for C.E.D.T [Includes tw... 1926 - 1926 \n", "20184 T. Sugimoto ex S.S. \"Aki Maru\" 1927 - 1927 \n", "20187 Restricted persons - RMS Mooltan 1927 - 1927 \n", "20188 Mrs Antonia Joseph - arrival and departure per... 1925 - 1925 \n", "20189 Fazal Deen - application for extension of Cert... 1922 - circa1922 \n", "20190 Fazal Deen - application for extension of Cert... 1922 - circa1922 \n", "20191 Fazal Deen - application for extension of Cert... 1922 - circa1922 \n", "20192 Fazal Deen - application for extension of Cert... 1922 - circa1922 \n", "\n", " start_date end_date access_status location digitised_status \\\n", "0 1924-01-01 1924-01-01 Open Melbourne False \n", "1 1926-01-01 1926-01-01 Open Melbourne False \n", "397 1930-01-01 1930-01-01 Open Melbourne False \n", "398 1928-01-01 1930-01-01 Open Melbourne False \n", "399 1930-01-01 1930-01-01 Open Melbourne False \n", "470 1927-01-01 1930-01-01 Open Melbourne False \n", "619 1924-01-01 1926-01-01 Open Melbourne True \n", "620 1929-01-01 1929-01-01 Open Melbourne False \n", "639 1930-01-01 1930-01-01 Open Melbourne False \n", "640 1930-01-01 1930-01-01 Open Melbourne False \n", "641 1929-01-01 1929-01-01 Open Melbourne False \n", "642 1927-01-01 1927-01-01 Open Melbourne False \n", "643 1927-01-01 1927-01-01 Open Melbourne False \n", "644 1922-01-01 1927-01-01 Open Melbourne False \n", "645 1920-01-01 1927-01-01 Open Melbourne False \n", "646 1926-01-01 1927-01-01 Open Melbourne False \n", "647 1927-01-01 1927-01-01 Open Melbourne False \n", "648 1925-01-01 1925-01-01 Open Melbourne False \n", "649 1926-01-01 1926-01-01 Open Melbourne False \n", "650 1923-01-01 1924-01-01 Open Melbourne False \n", "651 1925-01-01 1925-01-01 Open Melbourne False \n", "652 1929-01-01 1929-01-01 Open Melbourne False \n", "653 1925-01-01 1926-01-01 Open Melbourne False \n", "654 1925-01-01 1926-01-01 Open Melbourne False \n", "655 1926-01-01 1927-01-01 Open Melbourne False \n", "656 1925-01-01 1926-01-01 Open Melbourne False \n", "657 1926-01-01 1926-01-01 Open Melbourne False \n", "658 1926-01-01 1926-01-01 Open Melbourne False \n", "659 1926-01-01 1926-01-01 Open Melbourne False \n", "660 1927-01-01 1927-01-01 Open Melbourne False \n", "... ... ... ... ... ... \n", "20148 1925-01-01 1929-01-01 Open Melbourne False \n", "20159 1922-01-01 1922-01-01 Open Melbourne False \n", "20160 1922-01-01 1922-01-01 Open Melbourne False \n", "20162 1923-01-01 1923-01-01 Open Melbourne False \n", "20163 1923-01-01 1923-01-01 Open Melbourne False \n", "20164 1923-01-01 1924-01-01 Open Melbourne False \n", "20165 1923-01-01 1923-01-01 Open Melbourne False \n", "20166 1924-01-01 1924-01-01 Open Melbourne False \n", "20167 1925-01-01 1925-01-01 Open Melbourne False \n", "20168 1925-01-01 1925-01-01 Open Melbourne False \n", "20169 1925-01-01 1925-01-01 Open Melbourne False \n", "20171 1925-01-01 1925-01-01 Open Melbourne False \n", "20172 1926-01-01 1926-01-01 Open Melbourne False \n", "20173 1926-01-01 1926-01-01 Open Melbourne False \n", "20174 1925-01-01 1926-01-01 Open Melbourne False \n", "20175 1926-01-01 1926-01-01 Open Melbourne False \n", "20176 1926-01-01 1926-01-01 Open Melbourne False \n", "20177 1926-01-01 1926-01-01 Open Melbourne False \n", "20178 1926-01-01 1926-01-01 Open Melbourne False \n", "20179 1926-01-01 1926-01-01 Open Melbourne False \n", "20180 1926-01-01 1926-01-01 Open Melbourne False \n", "20182 1926-01-01 1926-01-01 Open Melbourne False \n", "20183 1926-01-01 1926-01-01 Open Melbourne False \n", "20184 1927-01-01 1927-01-01 Open Melbourne False \n", "20187 1927-01-01 1927-01-01 Open Melbourne False \n", "20188 1925-01-01 1925-01-01 Open Melbourne False \n", "20189 1922-01-01 1922-01-01 Open Melbourne True \n", "20190 1922-01-01 1922-01-01 Open Melbourne True \n", "20191 1922-01-01 1922-01-01 Open Melbourne False \n", "20192 1922-01-01 1922-01-01 Open Melbourne True \n", "\n", " digitised_pages \n", "0 0 \n", "1 0 \n", "397 0 \n", "398 0 \n", "399 0 \n", "470 0 \n", "619 4 \n", "620 0 \n", "639 0 \n", "640 0 \n", "641 0 \n", "642 0 \n", "643 0 \n", "644 0 \n", "645 0 \n", "646 0 \n", "647 0 \n", "648 0 \n", "649 0 \n", "650 0 \n", "651 0 \n", "652 0 \n", "653 0 \n", "654 0 \n", "655 0 \n", "656 0 \n", "657 0 \n", "658 0 \n", "659 0 \n", "660 0 \n", "... ... \n", "20148 0 \n", "20159 0 \n", "20160 0 \n", "20162 0 \n", "20163 0 \n", "20164 0 \n", "20165 0 \n", "20166 0 \n", "20167 0 \n", "20168 0 \n", "20169 0 \n", "20171 0 \n", "20172 0 \n", "20173 0 \n", "20174 0 \n", "20175 0 \n", "20176 0 \n", "20177 0 \n", "20178 0 \n", "20179 0 \n", "20180 0 \n", "20182 0 \n", "20183 0 \n", "20184 0 \n", "20187 0 \n", "20188 0 \n", "20189 1 \n", "20190 1 \n", "20191 0 \n", "20192 1 \n", "\n", "[9570 rows x 11 columns]" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "start_year = '1920'\n", "end_year = '1930'\n", "df_filtered = df[(df['start_date'] >= start_year) & (df['end_date'] <= end_year)]\n", "df_filtered" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## N-gram frequencies in file titles" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Import TextBlob for text analysis\n", "from textblob import TextBlob\n", "import nltk\n", "stopwords = nltk.corpus.stopwords.words('english')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Combine all of the file titles into a single string\n", "title_text = a = df['title'].str.lower().str.cat(sep=' ')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordcount
7per5,172
314ex4,363
902exemption3,688
618certificate3,577
1655dictation3,577
1581test3,553
71melbourne3,168
538application2,442
174departure2,006
14australia1,977
1543ah1,796
949passengers1,620
173arrival1,560
26ltd1,446
104act1,180
1482mrs1,175
6sydney1,075
1861s.s1,074
12permission1,050
830crew1,015
25pty950
1621applied927
1583chinese862
2049enemy858
24co835
" ], "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "blob = TextBlob(title_text)\n", "words = [[word, count] for word, count in blob.lower().word_counts.items() if word not in stopwords]\n", "word_counts = pd.DataFrame(words).rename({0: 'word', 1: 'count'}, axis=1).sort_values(by='count', ascending=False)\n", "word_counts[:25].style.format({'count': '{:,}'}).bar(subset=['count'], color='#d65f5f').set_properties(subset=['count'], **{'width': '300px'})" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "def get_ngram_counts(text, size):\n", " blob = TextBlob(text)\n", " # Extract n-grams as WordLists, then convert to a list of strings\n", " ngrams = [' '.join(ngram).lower() for ngram in blob.lower().ngrams(size)]\n", " # Convert to dataframe then count values and rename columns\n", " ngram_counts = pd.DataFrame(ngrams)[0].value_counts().rename_axis('ngram').reset_index(name='count')\n", " return ngram_counts\n", " \n", "def display_top_ngrams(text, size):\n", " ngram_counts = get_ngram_counts(text, 2)\n", " # Display top 25 results as a bar chart\n", " display(ngram_counts[:25].style.format({'count': '{:,}'}).bar(subset=['count'], color='#d65f5f').set_properties(subset=['count'], **{'width': '300px'}))" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ngramcount
0exemption from3,550
1from dictation3,540
2dictation test3,533
3for exemption3,004
4certificate for2,872
5for certificate2,660
6application for2,236
7melbourne per1,054
8departure per1,009
9pty ltd927
10applied for923
11to australia859
12trading with782
13enemy act765
14with enemy764
15test ah735
16permission to698
17act 1939691
18of exemption636
19certificate of625
20crew member623
21arrival per458
22to enter437
23of certificate419
24passengers melbourne390
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display_top_ngrams(title_text, 2)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ngramcount
0exemption from3,550
1from dictation3,540
2dictation test3,533
3for exemption3,004
4certificate for2,872
5for certificate2,660
6application for2,236
7melbourne per1,054
8departure per1,009
9pty ltd927
10applied for923
11to australia859
12trading with782
13enemy act765
14with enemy764
15test ah735
16permission to698
17act 1939691
18of exemption636
19certificate of625
20crew member623
21arrival per458
22to enter437
23of certificate419
24passengers melbourne390
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display_top_ngrams(title_text, 6)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }