{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"series = 'P437'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import os\n",
"import pandas as pd\n",
"import series_details\n",
"import plotly.offline as py\n",
"py.init_notebook_mode()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"
National Archives of Australia: Series P437
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Correspondence Files, Annual Single Number Series
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| Total items | 4,958 |
|---|
| Access status | |
|---|
| Open | 4,945 (99.74%) |
| Open with exception | 10 (0.20%) |
| Not yet examined | 2 (0.04%) |
| Closed | 1 (0.02%) |
| Number of items digitised | 18 (0.36%) |
|---|
| Number of pages digitised | 442 |
|---|
| Date of earliest content | 1901 |
|---|
| Date of latest content | 1940 |
|---|
Download the complete CSV file
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"series_details.display_summary(series, df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Content preview"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" identifier | \n",
" series | \n",
" control_symbol | \n",
" title | \n",
" contents_dates | \n",
" start_date | \n",
" end_date | \n",
" access_status | \n",
" location | \n",
" digitised_status | \n",
" digitised_pages | \n",
"
\n",
" \n",
" | 0 | \n",
" 538211 | \n",
" P437 | \n",
" 1940/279 | \n",
" Restricted drugs | \n",
" 1939 - 1940 | \n",
" 1939-01-01 00:00:00 | \n",
" 1940-01-01 00:00:00 | \n",
" Closed | \n",
" Hobart | \n",
" False | \n",
" 0 | \n",
"
\n",
" | 1 | \n",
" 542152 | \n",
" P437 | \n",
" WHOLE SERIES | \n",
" Correspondence files of the Collector of Customs, Hobart for the period 1908 to 1940; covers such topics as tariffs, trade, duty, immigration, export permits, patents lighthouse service, ship wrecks, passports,various grant and Bounty schemes | \n",
" 1908 - 1940 | \n",
" 1908-01-01 00:00:00 | \n",
" 1940-01-01 00:00:00 | \n",
" Open with exception | \n",
" Hobart | \n",
" False | \n",
" 0 | \n",
"
\n",
" | 2 | \n",
" 635923 | \n",
" P437 | \n",
" 1910/12 | \n",
" Accounts - postage paid - Board of Trade Journals | \n",
" 1909 - 1910 | \n",
" 1909-01-01 00:00:00 | \n",
" 1910-01-01 00:00:00 | \n",
" Open | \n",
" Hobart | \n",
" False | \n",
" 0 | \n",
"
\n",
" | 3 | \n",
" 642439 | \n",
" P437 | \n",
" 1910/14 | \n",
" Bank guarantees - cancellation of - The Commercial Bank of Tasmania Ltd. | \n",
" 1910 - 1910 | \n",
" 1910-01-01 00:00:00 | \n",
" 1910-01-01 00:00:00 | \n",
" Open | \n",
" Hobart | \n",
" False | \n",
" 0 | \n",
"
\n",
" | 4 | \n",
" 642442 | \n",
" P437 | \n",
" 1910/15 | \n",
" Imports of Fire Arms to Tasmania - January to June 1909. Returns. | \n",
" 1910 - 1910 | \n",
" 1910-01-01 00:00:00 | \n",
" 1910-01-01 00:00:00 | \n",
" Open | \n",
" Hobart | \n",
" False | \n",
" 0 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Change the number_of_rows value to see more\n",
"number_of_rows = 5\n",
"\n",
"# Display dataframe \n",
"df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector=\"th\", props=[(\"text-align\", \"center\")]),\n",
" dict(selector='.row_heading, .blank', props=[('display', 'none')])])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot content dates"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"data": [
{
"name": "Digitised",
"type": "bar",
"x": [
1910,
1911,
1912,
1913,
1914,
1915,
1921,
1922,
1923,
1925,
1926
],
"y": [
2,
2,
1,
1,
3,
2,
1,
2,
1,
3,
2
]
},
{
"name": "Not digitised",
"type": "bar",
"x": [
1901,
1902,
1903,
1904,
1905,
1906,
1907,
1908,
1909,
1910,
1911,
1912,
1913,
1914,
1915,
1916,
1917,
1918,
1919,
1920,
1921,
1922,
1923,
1924,
1925,
1926,
1927,
1928,
1929,
1930,
1931,
1932,
1933,
1934,
1935,
1936,
1937,
1938,
1939,
1940
],
"y": [
1,
1,
1,
3,
3,
4,
5,
5,
16,
277,
67,
563,
1183,
857,
321,
185,
6,
12,
236,
327,
264,
323,
347,
318,
195,
49,
7,
3,
4,
3,
3,
3,
3,
3,
3,
3,
3,
3,
4,
2
]
}
],
"layout": {
"barmode": "stack",
"title": "Content dates",
"xaxis": {
"title": "Year"
},
"yaxis": {
"title": "Number of items"
}
}
},
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = series_details.plot_dates(df)\n",
"py.iplot(fig, filename='series-dates-bar')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View word frequencies"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Combine all of the file titles into a single string\n",
"title_text = a = df['title'].str.lower().str.cat(sep=' ')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" word | \n",
" count | \n",
"
\n",
" \n",
" | 85 | \n",
" request | \n",
" 508 | \n",
"
\n",
" | 14 | \n",
" duty | \n",
" 382 | \n",
"
\n",
" | 5 | \n",
" customs | \n",
" 345 | \n",
"
\n",
" | 252 | \n",
" act | \n",
" 238 | \n",
"
\n",
" | 107 | \n",
" return | \n",
" 213 | \n",
"
\n",
" | 37 | \n",
" tasmania | \n",
" 200 | \n",
"
\n",
" | 127 | \n",
" goods | \n",
" 196 | \n",
"
\n",
" | 39 | \n",
" imports | \n",
" 186 | \n",
"
\n",
" | 219 | \n",
" mr | \n",
" 177 | \n",
"
\n",
" | 16 | \n",
" export | \n",
" 161 | \n",
"
\n",
" | 61 | \n",
" application | \n",
" 155 | \n",
"
\n",
" | 200 | \n",
" company | \n",
" 149 | \n",
"
\n",
" | 332 | \n",
" ss | \n",
" 135 | \n",
"
\n",
" | 6 | \n",
" hobart | \n",
" 134 | \n",
"
\n",
" | 146 | \n",
" import | \n",
" 128 | \n",
"
\n",
" | 89 | \n",
" forms | \n",
" 127 | \n",
"
\n",
" | 125 | \n",
" list | \n",
" 121 | \n",
"
\n",
" | 195 | \n",
" forwarded | \n",
" 118 | \n",
"
\n",
" | 100 | \n",
" certificate | \n",
" 117 | \n",
"
\n",
" | 292 | \n",
" launceston | \n",
" 114 | \n",
"
\n",
" | 141 | \n",
" imported | \n",
" 102 | \n",
"
\n",
" | 302 | \n",
" beer | \n",
" 102 | \n",
"
\n",
" | 189 | \n",
" officers | \n",
" 97 | \n",
"
\n",
" | 212 | \n",
" invoice | \n",
" 95 | \n",
"
\n",
" | 108 | \n",
" commonwealth | \n",
" 94 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series_details.display_word_counts(title_text)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" ngram | \n",
" count | \n",
"
\n",
" \n",
" | 0 | \n",
" request for | \n",
" 354 | \n",
"
\n",
" | 1 | \n",
" to be | \n",
" 121 | \n",
"
\n",
" | 2 | \n",
" application for | \n",
" 111 | \n",
"
\n",
" | 3 | \n",
" of customs | \n",
" 88 | \n",
"
\n",
" | 4 | \n",
" duty on | \n",
" 83 | \n",
"
\n",
" | 5 | \n",
" of duty | \n",
" 82 | \n",
"
\n",
" | 6 | \n",
" certificate of | \n",
" 82 | \n",
"
\n",
" | 7 | \n",
" of the | \n",
" 76 | \n",
"
\n",
" | 8 | \n",
" for the | \n",
" 72 | \n",
"
\n",
" | 9 | \n",
" return of | \n",
" 68 | \n",
"
\n",
" | 10 | \n",
" and company | \n",
" 66 | \n",
"
\n",
" | 11 | \n",
" export of | \n",
" 64 | \n",
"
\n",
" | 12 | \n",
" with the | \n",
" 62 | \n",
"
\n",
" | 13 | \n",
" commerce act | \n",
" 61 | \n",
"
\n",
" | 14 | \n",
" import of | \n",
" 61 | \n",
"
\n",
" | 15 | \n",
" collector of | \n",
" 59 | \n",
"
\n",
" | 16 | \n",
" list of | \n",
" 59 | \n",
"
\n",
" | 17 | \n",
" return showing | \n",
" 57 | \n",
"
\n",
" | 18 | \n",
" imports of | \n",
" 56 | \n",
"
\n",
" | 19 | \n",
" being forwarded | \n",
" 55 | \n",
"
\n",
" | 20 | \n",
" trading with | \n",
" 55 | \n",
"
\n",
" | 21 | \n",
" of exemption | \n",
" 54 | \n",
"
\n",
" | 22 | \n",
" the enemy | \n",
" 53 | \n",
"
\n",
" | 23 | \n",
" importation of | \n",
" 51 | \n",
"
\n",
" | 24 | \n",
" of goods | \n",
" 49 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Change ngram_count for larger ngrams (trigrams etc)\n",
"ngram_count = 2\n",
"series_details.display_top_ngrams(title_text, ngram_count)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}