{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "series = 'J2483'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "import pandas as pd\n", "import series_details\n", "import plotly.offline as py\n", "py.init_notebook_mode()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "

National Archives of Australia: Series J2483

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

Certificates Exempting from Dictation Test [CEDT] issued under \"The Immigration Restriction Acts 1901-1905\" and Regulations (and amending legislation), two number series

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Total items14,438
Access status
Open14,436 (99.99%)
Not yet examined2 (0.01%)
Number of items digitised14,436 (99.99%)
Number of pages digitised79,210
Date of earliest content1903
Date of latest content1956

Download the complete CSV file

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "series_details.display_summary(series, df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Content preview" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifierseriescontrol_symboltitlecontents_datesstart_dateend_dateaccess_statuslocationdigitised_statusdigitised_pages
09086001J248316/16Certificate Exempting from Dictation Test (CEDT) - Name: Yong Min - Nationality: Chinese - Birthplace: Canton - departed for China per TAIYUAN on 3 February 19091909 - 19091909-01-01 00:00:001909-01-01 00:00:00OpenBrisbaneTrue2
19086002J248316/17Certificate Exempting from Dictation Test (CEDT) - Name: Hong Chin - Nationality: Chinese - Birthplace: Canton - departed for China per TAIYUAN on 3 February 1909, returned to Cairns per EMPIRE on 16 June 19101909 - 19101909-01-01 00:00:001910-01-01 00:00:00OpenBrisbaneTrue7
29086003J248316/18Certificate Exempting from Dictation Test (CEDT) - Name: Ah Mun - Nationality: Chinese - Birthplace: Canton - departed for China per SS EASTERN on 11 June 1909, returned to Cairns per EASTERN on 22 October 19101909 - 19101909-01-01 00:00:001910-01-01 00:00:00OpenBrisbaneTrue7
39086004J248316/21Certificate Exempting from Dictation Test (CEDT) - Name: Tommy Hong - Nationality: Chinese - Birthplace: Canton - departed for China per EMPIRE on 17 February 1909, returned to Brisbane per EMPIRE on 6 November 19111909 - 19111909-01-01 00:00:001911-01-01 00:00:00OpenBrisbaneTrue7
49086005J248316/22Certificate Exempting from Dictation Test (CEDT) - Name: Duck Shan - Nationality: Chinese - Birthplace: Canton - departed for China per EMPIRE on 18 February 19091909 - 19091909-01-01 00:00:001909-01-01 00:00:00OpenBrisbaneTrue2
" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Change the number_of_rows value to see more\n", "number_of_rows = 5\n", "\n", "# Display dataframe \n", "df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector=\"th\", props=[(\"text-align\", \"center\")]),\n", " dict(selector='.row_heading, .blank', props=[('display', 'none')])])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot content dates" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "Digitised", "type": "bar", "x": [ 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956 ], "y": [ 1, 1, 2, 6, 11, 325, 944, 1375, 1604, 1863, 1875, 1775, 1950, 2064, 1786, 1809, 1785, 1784, 1881, 1771, 1588, 1382, 1181, 1310, 1255, 1119, 926, 814, 648, 565, 497, 513, 522, 462, 421, 354, 248, 160, 108, 40, 36, 32, 32, 58, 77, 51, 30, 19, 9, 3, 2, 2, 3, 2 ] }, { "name": "Not digitised", "type": "bar", "x": [ 1947, 1948, 1949, 1950, 1951 ], "y": [ 1, 1, 2, 1, 1 ] } ], "layout": { "barmode": "stack", "title": "Content dates", "xaxis": { "title": "Year" }, "yaxis": { "title": "Number of items" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = series_details.plot_dates(df)\n", "py.iplot(fig, filename='series-dates-bar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## View word frequencies" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Combine all of the file titles into a single string\n", "title_text = a = df['title'].str.lower().str.cat(sep=' ')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordcount
14per24,566
0certificate14,547
4cedt14,452
5name14,441
1exempting14,441
2dictation14,440
3test14,440
8nationality14,364
10birthplace13,830
12departed13,119
9chinese11,753
21returned11,529
13china10,988
11canton10,350
22cairns4,777
119maru4,649
45townsville4,261
36brisbane3,882
30eastern3,139
62st2,569
46december2,563
38november2,544
63albans2,540
27ah2,302
33october2,280
" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series_details.display_word_counts(title_text)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ngramcount
0exempting from14,441
1certificate exempting14,440
2from dictation14,440
3dictation test14,440
4test cedt14,438
5cedt name14,415
6departed for13,078
7nationality chinese11,675
8chinese birthplace11,268
9birthplace canton10,289
10for china9,148
11china per9,140
12canton departed8,297
13returned to7,009
14maru on3,819
15townsville per3,443
16brisbane per3,172
17cairns per2,994
18per eastern2,948
19st albans2,538
20eastern on2,497
21per st2,354
22to brisbane2,015
23to townsville2,002
24hong kong1,980
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Change ngram_count for larger ngrams (trigrams etc)\n", "ngram_count = 2\n", "series_details.display_top_ngrams(title_text, ngram_count)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }