{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "series = 'ST84/1'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "import pandas as pd\n", "import series_details\n", "import plotly.offline as py\n", "py.init_notebook_mode()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "

National Archives of Australia: Series ST84/1

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

Certificates of Domicile and Certificates of Exemption from Dictation Test, chronological series

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Total items2,765
Access status
Open2,758 (99.75%)
Not yet examined7 (0.25%)
Number of items digitised434 (15.70%)
Number of pages digitised13,979
Date of earliest content1855
Date of latest content1975

Download the complete CSV file

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "series_details.display_summary(series, df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Content preview" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifierseriescontrol_symboltitlecontents_datesstart_dateend_dateaccess_statuslocationdigitised_statusdigitised_pages
01731871ST84/11907/391-400James Lee Chong, Way Sing, Walter Hing Hee, Ah See, Charlie Joy, Nicholas Saseen, Foo Jun, Hop Sing, Sun Sing Lee and Jack Hoy [Certificate Exempting from Dictation Test - includes left hand impression and photographs] [box 16]1907 - 19071907-01-01 00:00:001907-01-01 00:00:00OpenSydneyTrue34
17288001ST84/11919/270/81-90Jong Say, Wong Kwong, Lee You Wing, Foo Gun, Mar Kum, Gock Buck, Ah Get, Jeong Keong, Percy Zuinn and Ah Yum [Certificate Exempting from Dictation Test - includes left hand impression and photographs] [box 122]1919 - 19191919-01-01 00:00:001919-01-01 00:00:00OpenSydneyFalse0
27288002ST84/11919/270/91-100Ming Gar, Ah Loong, Lun Soy, Gung Sun, Ah Lock, John Nop or Jan Nap, Peter Sing, Louie Wee, Sue Hoo and Lee Yuen [Certificate Exempting from Dictation Test - includes left hand impression and photographs] [box 122]1919 - 19191919-01-01 00:00:001919-01-01 00:00:00OpenSydneyFalse0
37288003ST84/11919/271/1-10Charley Eip, Chew Bun, Lee Chut, Lum Gow, Tommy Low, Low Zuai, Charlie Gong or Charlie Kwong, Lee So, Chi Wort and Go Foo [Certificate Exempting from Dictation Test - includes left hand impression and photographs] [box 123]1919 - 19191919-01-01 00:00:001919-01-01 00:00:00OpenSydneyFalse0
47288004ST84/11919/271/11-20Dewan Singh, Joseph Sequiera, Lee Gum Sue, Ah Suey, Fong Foon, Ah Seck, Man Duck, Lee Tim and Ah Moon [Certificate Exempting from Dictation Test - includes left hand impression and photographs] [box 123]1919 - 19191919-01-01 00:00:001919-01-01 00:00:00OpenSydneyFalse0
" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Change the number_of_rows value to see more\n", "number_of_rows = 5\n", "\n", "# Display dataframe \n", "df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector=\"th\", props=[(\"text-align\", \"center\")]),\n", " dict(selector='.row_heading, .blank', props=[('display', 'none')])])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot content dates" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "Digitised", "type": "bar", "x": [ 1855, 1856, 1857, 1858, 1859, 1860, 1861, 1862, 1863, 1864, 1865, 1866, 1867, 1868, 1869, 1870, 1871, 1872, 1873, 1874, 1875, 1876, 1877, 1878, 1879, 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953 ], "y": [ 1, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 6, 6, 13, 13, 14, 18, 24, 27, 35, 38, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 29, 37, 46, 58, 72, 54, 17, 7, 10, 4, 6, 5, 7, 4, 6, 5, 7, 4, 5, 3, 2, 1, 3, 5, 3, 7, 4, 4, 1, 2, 2, 3, 3, 4, 3, 3, 3, 2, 2, 1, 1, 1, 6, 1, 2, 1, 1, 1, 1, 1 ] }, { "name": "Not digitised", "type": "bar", "x": [ 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1889, 1890, 1891, 1892, 1893, 1894, 1895, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1975 ], "y": [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 5, 2, 2, 4, 3, 26, 74, 83, 97, 111, 103, 111, 95, 73, 63, 83, 83, 113, 85, 100, 98, 77, 99, 102, 85, 74, 71, 38, 38, 38, 39, 38, 42, 36, 29, 29, 23, 22, 2, 2, 2, 2, 36, 27, 9, 9, 11, 6, 3, 2, 1 ] } ], "layout": { "barmode": "stack", "title": "Content dates", "xaxis": { "title": "Year" }, "yaxis": { "title": "Number of items" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = series_details.plot_dates(df)\n", "py.iplot(fig, filename='series-dates-bar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## View word frequencies" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Combine all of the file titles into a single string\n", "title_text = a = df['title'].str.lower().str.cat(sep=' ')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordcount
8ah6,070
29box2,830
20certificate2,747
1lee2,737
24includes2,735
26hand2,732
25left2,732
27impression2,732
28photographs2,715
21exempting2,646
23test2,646
22dictation2,646
2chong1,228
4sing1,171
33wong1,060
107young1,000
108yee934
110george702
170choy679
6hing653
73low650
10charlie612
70lum609
86fong580
95gee575
" ], "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series_details.display_word_counts(title_text)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ngramcount
0includes left2,732
1hand impression2,732
2left hand2,732
3photographs box2,715
4impression and2,712
5and photographs2,712
6dictation test2,646
7exempting from2,646
8from dictation2,646
9certificate exempting2,646
10test includes2,639
11and ah518
12or ah326
13lee ah199
14ah chong173
15ah sing172
16sing ah166
17ah sam165
18and lee152
19chong ah145
20sydney nsw128
21lee and128
22lee certificate125
23ah tong110
24sing and104
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Change ngram_count for larger ngrams (trigrams etc)\n", "ngram_count = 2\n", "series_details.display_top_ngrams(title_text, ngram_count)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }