{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"series = 'D596'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import os\n",
"import pandas as pd\n",
"import series_details\n",
"import plotly.offline as py\n",
"py.init_notebook_mode()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"
National Archives of Australia: Series D596
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Correspondence files, annual single number series
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Total items | 11,395 |
---|
Access status | |
---|
Not yet examined | 8,381 (73.55%) |
Open | 2,983 (26.18%) |
Open with exception | 31 (0.27%) |
Number of items digitised | 185 (1.62%) |
---|
Number of pages digitised | 3,031 |
---|
Date of earliest content | 1871 |
---|
Date of latest content | 1971 |
---|
Download the complete CSV file
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"series_details.display_summary(series, df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Content preview"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" identifier | \n",
" series | \n",
" control_symbol | \n",
" title | \n",
" contents_dates | \n",
" start_date | \n",
" end_date | \n",
" access_status | \n",
" location | \n",
" digitised_status | \n",
" digitised_pages | \n",
"
\n",
" \n",
" 0 | \n",
" 319709 | \n",
" D596 | \n",
" 1902/647 | \n",
" Immigration Restriction Act - Domicile Certificate | \n",
" 1902 - 1902 | \n",
" 1902-01-01 00:00:00 | \n",
" 1902-01-01 00:00:00 | \n",
" Open | \n",
" Adelaide | \n",
" False | \n",
" 0 | \n",
"
\n",
" 1 | \n",
" 319888 | \n",
" D596 | \n",
" 1908/5433 | \n",
" Chinese prohibited immigrants | \n",
" 1908 - 1908 | \n",
" 1908-01-01 00:00:00 | \n",
" 1908-01-01 00:00:00 | \n",
" Open | \n",
" Adelaide | \n",
" False | \n",
" 0 | \n",
"
\n",
" 2 | \n",
" 320267 | \n",
" D596 | \n",
" 1914/5906 | \n",
" War between Great Britain & Turkey - Proclamation | \n",
" 1914 - 1914 | \n",
" 1914-01-01 00:00:00 | \n",
" 1914-01-01 00:00:00 | \n",
" Open | \n",
" Adelaide | \n",
" False | \n",
" 0 | \n",
"
\n",
" 3 | \n",
" 320290 | \n",
" D596 | \n",
" 1914/6869 | \n",
" Proclamation extending the scope of certain existing proclamations and a certain order in Council connected with the war | \n",
" 1914 - 1914 | \n",
" 1914-01-01 00:00:00 | \n",
" 1914-01-01 00:00:00 | \n",
" Open | \n",
" Adelaide | \n",
" False | \n",
" 0 | \n",
"
\n",
" 4 | \n",
" 320382 | \n",
" D596 | \n",
" 1916/1544 | \n",
" Public Trustee Herman P ZONDER - enemy shareholder | \n",
" 1917 - 1918 | \n",
" 1917-01-01 00:00:00 | \n",
" 1918-01-01 00:00:00 | \n",
" Open | \n",
" Adelaide | \n",
" False | \n",
" 0 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Change the number_of_rows value to see more\n",
"number_of_rows = 5\n",
"\n",
"# Display dataframe \n",
"df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector=\"th\", props=[(\"text-align\", \"center\")]),\n",
" dict(selector='.row_heading, .blank', props=[('display', 'none')])])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot content dates"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"data": [
{
"name": "Digitised",
"type": "bar",
"x": [
1891,
1892,
1893,
1894,
1895,
1896,
1897,
1898,
1899,
1900,
1901,
1902,
1903,
1904,
1905,
1906,
1907,
1908,
1909,
1910,
1911,
1912,
1913,
1914,
1915,
1916,
1917,
1918,
1919,
1920,
1921,
1922,
1923,
1924,
1925,
1926,
1927,
1928,
1929,
1930,
1931,
1932,
1933,
1934,
1935,
1936,
1937,
1938,
1939,
1940,
1941,
1942,
1943,
1944,
1945,
1946,
1947,
1948,
1949,
1950,
1951,
1952,
1953,
1954,
1955,
1956,
1957,
1958,
1959,
1960,
1961,
1962,
1963,
1964,
1965
],
"y": [
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
3,
3,
2,
3,
4,
4,
6,
11,
8,
7,
7,
7,
5,
8,
8,
7,
7,
9,
9,
14,
7,
9,
7,
7,
5,
5,
12,
13,
13,
13,
18,
16,
16,
17,
16,
7,
14,
22,
5,
5,
4,
4,
6,
6,
7,
5,
4,
5,
9,
11,
14,
16,
5,
5,
6,
7,
6,
5,
6,
7,
6,
4,
3,
1
]
},
{
"name": "Not digitised",
"type": "bar",
"x": [
1871,
1872,
1873,
1874,
1875,
1876,
1877,
1878,
1879,
1880,
1881,
1882,
1883,
1884,
1885,
1886,
1887,
1888,
1889,
1890,
1891,
1892,
1893,
1894,
1895,
1896,
1897,
1898,
1899,
1900,
1901,
1902,
1903,
1904,
1905,
1906,
1907,
1908,
1909,
1910,
1911,
1912,
1913,
1914,
1915,
1916,
1917,
1918,
1919,
1920,
1921,
1922,
1923,
1924,
1925,
1926,
1927,
1928,
1929,
1930,
1931,
1932,
1933,
1934,
1935,
1936,
1937,
1938,
1939,
1940,
1941,
1942,
1943,
1944,
1945,
1946,
1947,
1948,
1949,
1950,
1951,
1952,
1953,
1954,
1955,
1956,
1957,
1958,
1959,
1960,
1961,
1962,
1963,
1964,
1965,
1966,
1967,
1968,
1969,
1970,
1971
],
"y": [
19,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
4,
5,
5,
6,
5,
7,
7,
7,
7,
7,
7,
7,
7,
8,
8,
8,
8,
12,
96,
134,
112,
133,
154,
180,
168,
145,
179,
160,
179,
199,
193,
250,
282,
318,
345,
336,
335,
489,
596,
469,
448,
410,
379,
390,
751,
700,
780,
852,
781,
812,
808,
820,
798,
1177,
1078,
1237,
1230,
513,
509,
619,
617,
597,
571,
556,
648,
636,
606,
613,
705,
906,
871,
623,
473,
421,
363,
386,
347,
322,
392,
218,
110,
73,
36,
17,
7,
3,
3,
2,
1
]
}
],
"layout": {
"barmode": "stack",
"title": "Content dates",
"xaxis": {
"title": "Year"
},
"yaxis": {
"title": "Number of items"
}
}
},
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = series_details.plot_dates(df)\n",
"py.iplot(fig, filename='series-dates-bar')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View word frequencies"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Combine all of the file titles into a single string\n",
"title_text = a = df['title'].str.lower().str.cat(sep=' ')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" word | \n",
" count | \n",
"
\n",
" \n",
" 353 | \n",
" ltd | \n",
" 946 | \n",
"
\n",
" 2 | \n",
" act | \n",
" 937 | \n",
"
\n",
" 3931 | \n",
" passport | \n",
" 935 | \n",
"
\n",
" 376 | \n",
" ss | \n",
" 813 | \n",
"
\n",
" 0 | \n",
" immigration | \n",
" 749 | \n",
"
\n",
" 241 | \n",
" co | \n",
" 739 | \n",
"
\n",
" 3893 | \n",
" classification | \n",
" 717 | \n",
"
\n",
" 207 | \n",
" adelaide | \n",
" 601 | \n",
"
\n",
" 1388 | \n",
" enquiry | \n",
" 598 | \n",
"
\n",
" 112 | \n",
" customs | \n",
" 581 | \n",
"
\n",
" 58 | \n",
" report | \n",
" 534 | \n",
"
\n",
" 5344 | \n",
" wife | \n",
" 509 | \n",
"
\n",
" 423 | \n",
" tariff | \n",
" 480 | \n",
"
\n",
" 6 | \n",
" prohibited | \n",
" 450 | \n",
"
\n",
" 106 | \n",
" mr | \n",
" 434 | \n",
"
\n",
" 206 | \n",
" port | \n",
" 427 | \n",
"
\n",
" 50 | \n",
" ex | \n",
" 426 | \n",
"
\n",
" 274 | \n",
" duty | \n",
" 413 | \n",
"
\n",
" 204 | \n",
" shipping | \n",
" 377 | \n",
"
\n",
" 3114 | \n",
" permit | \n",
" 370 | \n",
"
\n",
" 666 | \n",
" claim | \n",
" 364 | \n",
"
\n",
" 161 | \n",
" australia | \n",
" 353 | \n",
"
\n",
" 321 | \n",
" goods | \n",
" 350 | \n",
"
\n",
" 289 | \n",
" office | \n",
" 347 | \n",
"
\n",
" 298 | \n",
" regarding | \n",
" 336 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series_details.display_word_counts(title_text)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" ngram | \n",
" count | \n",
"
\n",
" \n",
" 0 | \n",
" enquiry by | \n",
" 537 | \n",
"
\n",
" 1 | \n",
" immigration report | \n",
" 497 | \n",
"
\n",
" 2 | \n",
" classification of | \n",
" 406 | \n",
"
\n",
" 3 | \n",
" by wife | \n",
" 366 | \n",
"
\n",
" 4 | \n",
" claim no | \n",
" 325 | \n",
"
\n",
" 5 | \n",
" merchant shipping | \n",
" 282 | \n",
"
\n",
" 6 | \n",
" shipping act | \n",
" 280 | \n",
"
\n",
" 7 | \n",
" co ltd | \n",
" 270 | \n",
"
\n",
" 8 | \n",
" clearing office | \n",
" 251 | \n",
"
\n",
" 9 | \n",
" office claim | \n",
" 238 | \n",
"
\n",
" 10 | \n",
" application for | \n",
" 233 | \n",
"
\n",
" 11 | \n",
" port adelaide | \n",
" 209 | \n",
"
\n",
" 12 | \n",
" tariff classification | \n",
" 203 | \n",
"
\n",
" 13 | \n",
" prohibited publication | \n",
" 199 | \n",
"
\n",
" 14 | \n",
" immigration act | \n",
" 190 | \n",
"
\n",
" 15 | \n",
" passport enquiry | \n",
" 181 | \n",
"
\n",
" 16 | \n",
" landing permit | \n",
" 173 | \n",
"
\n",
" 17 | \n",
" at port | \n",
" 169 | \n",
"
\n",
" 18 | \n",
" official no | \n",
" 167 | \n",
"
\n",
" 19 | \n",
" report mv | \n",
" 154 | \n",
"
\n",
" 20 | \n",
" of the | \n",
" 149 | \n",
"
\n",
" 21 | \n",
" crew ss | \n",
" 137 | \n",
"
\n",
" 22 | \n",
" for duty | \n",
" 132 | \n",
"
\n",
" 23 | \n",
" pty ltd | \n",
" 130 | \n",
"
\n",
" 24 | \n",
" transfer of | \n",
" 127 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Change ngram_count for larger ngrams (trigrams etc)\n",
"ngram_count = 2\n",
"series_details.display_top_ngrams(title_text, ngram_count)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}