{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "series = 'D2860'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "import pandas as pd\n", "import series_details\n", "import plotly.offline as py\n", "py.init_notebook_mode()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "

National Archives of Australia: Series D2860

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

Immigration Restriction Act exemption certificate register

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Total items1
Access status
Open with exception1 (100.00%)
Number of items digitised0 (0.00%)
Number of pages digitised0
Date of earliest content1902
Date of latest content1957

Download the complete CSV file

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "series_details.display_summary(series, df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Content preview" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifierseriescontrol_symboltitlecontents_datesstart_dateend_dateaccess_statuslocationdigitised_statusdigitised_pages
0329730D2860WHOLE SERIESImmigration Restriction Act exemption certificate register1902 - 19571902-01-01 00:00:001957-01-01 00:00:00Open with exceptionMelbourneFalse0
" ], "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Change the number_of_rows value to see more\n", "number_of_rows = 5\n", "\n", "# Display dataframe \n", "df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector=\"th\", props=[(\"text-align\", \"center\")]),\n", " dict(selector='.row_heading, .blank', props=[('display', 'none')])])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot content dates" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "Not digitised", "type": "bar", "x": [ 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957 ], "y": [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "barmode": "stack", "title": "Content dates", "xaxis": { "title": "Year" }, "yaxis": { "title": "Number of items" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = series_details.plot_dates(df)\n", "py.iplot(fig, filename='series-dates-bar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## View word frequencies" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Combine all of the file titles into a single string\n", "title_text = a = df['title'].str.lower().str.cat(sep=' ')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/tim/mycode/naa-data-wap/lib/python3.6/site-packages/pandas/io/formats/style.py:939: RuntimeWarning:\n", "\n", "divide by zero encountered in long_scalars\n", "\n" ] }, { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordcount
0immigration1
1restriction1
2act1
3exemption1
4certificate1
5register1
" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series_details.display_word_counts(title_text)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/tim/mycode/naa-data-wap/lib/python3.6/site-packages/pandas/io/formats/style.py:939: RuntimeWarning:\n", "\n", "divide by zero encountered in long_scalars\n", "\n" ] }, { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ngramcount
0act exemption1
1certificate register1
2exemption certificate1
3immigration restriction1
4restriction act1
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Change ngram_count for larger ngrams (trigrams etc)\n", "ngram_count = 2\n", "series_details.display_top_ngrams(title_text, ngram_count)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }