{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "series = 'SP42/1'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "import pandas as pd\n", "import series_details\n", "import plotly.offline as py\n", "py.init_notebook_mode()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "

National Archives of Australia: Series SP42/1

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

Correspondence of the Collector of Customs relating to Immigration Restriction and Passports

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Total items16,256
Access status
Open15,525 (95.50%)
Not yet examined731 (4.50%)
Number of items digitised3,253 (20.01%)
Number of pages digitised45,862
Date of earliest content1881
Date of latest content1960

Download the complete CSV file

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "series_details.display_summary(series, df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Content preview" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifierseriescontrol_symboltitlecontents_datesstart_dateend_dateaccess_statuslocationdigitised_statusdigitised_pages
01053878SP42/1B1906/694AH KIM [correspondence of the Collector of Customs relating to immigration restrictions] [6 pages] [box 15]1906 - 19061906-01-01 00:00:001906-01-01 00:00:00OpenSydneyTrue6
11563661SP42/1B1905/1553Ah Kong, includes photographs1905 - circa19051905-01-01 00:00:00NaTOpenSydneyTrue10
21563665SP42/1B1905/1557Ah Yet, includes photographs1905 - 19051905-01-01 00:00:001905-01-01 00:00:00OpenSydneyTrue12
31563670SP42/1B1905/1561You Gee1905 - 19051905-01-01 00:00:001905-01-01 00:00:00OpenSydneyTrue8
41563675SP42/1B1905/1565Deserters from the RMS INDIA; Meer Afzul and Ackbar Carrandad1905 - 19051905-01-01 00:00:001905-01-01 00:00:00OpenSydneyTrue17
" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Change the number_of_rows value to see more\n", "number_of_rows = 5\n", "\n", "# Display dataframe \n", "df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector=\"th\", props=[(\"text-align\", \"center\")]),\n", " dict(selector='.row_heading, .blank', props=[('display', 'none')])])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot content dates" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "Digitised", "type": "bar", "x": [ 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1953 ], "y": [ 1, 2, 7, 12, 14, 14, 14, 15, 16, 16, 16, 19, 19, 19, 19, 19, 20, 21, 30, 41, 57, 88, 296, 283, 384, 319, 465, 518, 630, 762, 708, 670, 556, 607, 675, 741, 657, 544, 451, 247, 98, 91, 87, 82, 84, 78, 71, 63, 61, 56, 51, 48, 43, 37, 34, 34, 37, 32, 33, 18, 11, 10, 7, 8, 4, 3, 1, 1 ] }, { "name": "Not digitised", "type": "bar", "x": [ 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1960 ], "y": [ 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 5, 6, 6, 8, 10, 14, 19, 34, 137, 265, 429, 691, 967, 1260, 1605, 1888, 2244, 2616, 3060, 3236, 3358, 3393, 3542, 3679, 3853, 4021, 4254, 4344, 4200, 3940, 3823, 3709, 3403, 3099, 2753, 2417, 2120, 1932, 1731, 1565, 1486, 1617, 1729, 1533, 1365, 996, 713, 473, 773, 1276, 479, 129, 26, 14, 11, 10, 10, 10, 9, 12, 7, 4, 4, 1 ] } ], "layout": { "barmode": "stack", "title": "Content dates", "xaxis": { "title": "Year" }, "yaxis": { "title": "Number of items" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = series_details.plot_dates(df)\n", "py.iplot(fig, filename='series-dates-bar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## View word frequencies" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Combine all of the file titles into a single string\n", "title_text = a = df['title'].str.lower().str.cat(sep=' ')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordcount
10box14,077
13includes9,891
43left9,844
77prints7,827
32showing7,181
33front7,144
203ex6,808
34side6,695
35views6,590
14photographs6,521
1055thumb6,171
235right6,149
158sydney4,292
1047subject3,745
0ah3,585
39also3,409
40known3,318
1052arrived2,935
1063issue2,843
1064favour2,795
63certificate2,737
44hand2,635
3122,503
240exemption2,371
1061finger1,978
" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series_details.display_word_counts(title_text)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ngramcount
0showing front7,109
1and side6,660
2front and6,637
3side views6,554
4and left6,514
5and right6,139
6left and6,082
7photographs showing6,072
8thumb prints6,049
9right thumb6,005
10known as3,296
11also known3,293
12views and3,244
13sydney on3,098
14prints box2,994
15of subject2,898
16issue of2,836
17in favour2,790
18favour of2,778
19in sydney2,741
20arrived ex2,694
21subject box2,596
22left hand2,585
23prints and2,262
24of exemption2,052
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Change ngram_count for larger ngrams (trigrams etc)\n", "ngram_count = 2\n", "series_details.display_top_ngrams(title_text, ngram_count)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }