{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "series = 'A6122'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "import pandas as pd\n", "import series_details\n", "import plotly.offline as py\n", "py.init_notebook_mode()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "

National Archives of Australia: Series A6122

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "

Subject files, multiple number series

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Total items2,819
Access status
Open with exception2,376 (84.29%)
Open162 (5.75%)
Closed138 (4.90%)
Not yet examined137 (4.86%)
Withheld pending agency advice6 (0.21%)
Number of items digitised565 (20.04%)
Number of pages digitised69,007
Date of earliest content1800
Date of latest content1993
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "series_details.display_summary(series, df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Content preview" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
identifierseriescontrol_symboltitlecontents_datesstart_dateend_dateaccess_statuslocationdigitised_statusdigitised_pages
0217109A6122124Comintern [Communist International] [includes papers on The League against Cruelties and Oppression in the Colonies] File ends 1966.1927 - 19661927-01-01 00:00:001966-01-01 00:00:00Open with exceptionCanberraFalse0
1217110A6122125Left Book Club1938 - 19411938-01-01 00:00:001941-01-01 00:00:00OpenCanberraFalse0
2217111A6122126Industrial Workers of the World Volume 1 Part 11916 - 19481916-01-01 00:00:001948-01-01 00:00:00Open with exceptionCanberraTrue253
3217113A6122127Textile Workers Militant Committee1944 - 19441944-01-01 00:00:001944-01-01 00:00:00OpenCanberraFalse0
4217114A6122128Irish Republican Army1940 - 19441940-01-01 00:00:001944-01-01 00:00:00OpenCanberraTrue63
" ], "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Change the number_of_rows value to see more\n", "number_of_rows = 5\n", "\n", "# Display dataframe \n", "df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector=\"th\", props=[(\"text-align\", \"center\")]),\n", " dict(selector='.row_heading, .blank', props=[('display', 'none')])])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Plot content dates" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "name": "Digitised", "type": "bar", "x": [ 1800, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989 ], "y": [ 1, 2, 2, 5, 6, 5, 5, 6, 7, 5, 5, 5, 6, 8, 8, 8, 10, 11, 11, 12, 25, 27, 28, 31, 39, 43, 51, 53, 63, 66, 74, 82, 85, 85, 102, 98, 107, 114, 131, 125, 111, 118, 111, 126, 105, 104, 108, 95, 87, 83, 84, 88, 82, 69, 77, 77, 77, 85, 64, 48, 38, 44, 42, 48, 40, 33, 28, 22, 12, 10, 9, 6, 4, 4, 4 ] }, { "name": "Not digitised", "type": "bar", "x": [ 1800, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1909, 1910, 1911, 1912, 1913, 1914, 1915, 1916, 1917, 1918, 1919, 1920, 1921, 1922, 1923, 1924, 1925, 1926, 1927, 1928, 1929, 1930, 1931, 1932, 1933, 1934, 1935, 1936, 1937, 1938, 1939, 1940, 1941, 1942, 1943, 1944, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970, 1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993 ], "y": [ 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 10, 10, 10, 10, 11, 11, 11, 12, 11, 13, 16, 17, 17, 19, 21, 23, 25, 45, 50, 53, 55, 76, 102, 118, 127, 159, 176, 181, 201, 218, 240, 422, 415, 493, 607, 724, 740, 677, 482, 404, 390, 389, 359, 362, 338, 296, 251, 242, 181, 183, 179, 173, 172, 142, 111, 98, 86, 92, 94, 72, 53, 43, 42, 36, 33, 33, 23, 20, 15, 15, 9, 9, 7, 5, 3, 3 ] } ], "layout": { "barmode": "stack", "title": "Content dates", "xaxis": { "title": "Year" }, "yaxis": { "title": "Number of items" } } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = series_details.plot_dates(df)\n", "py.iplot(fig, filename='series-dates-bar')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## View word frequencies" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Combine all of the file titles into a single string\n", "title_text = a = df['title'].str.lower().str.cat(sep=' ')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordcount
42australia1,339
18volume1,277
45party1,078
1communist1,061
191456
44cpa418
102australian359
279branch334
36south308
642278
35new275
226nsw231
204interest222
202victoria195
37wales182
46queensland167
1933155
114asio153
317associations146
268cp131
3394117
34council114
39union107
47association106
43general104
" ], "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series_details.display_word_counts(title_text)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ngramcount
0communist party1,019
1of australia1,016
2party of973
3volume 1398
4cpa communist250
5volume 2244
6interest in211
7new south182
8south wales182
9of a160
10a communist145
11volume 3141
12cp of131
13branch communist118
14volume 4109
15associations individual95
16australia interest88
17south australia87
18australia new83
19australia queensland79
20australia nsw77
21of the76
22western australia76
23australia victoria72
24nsw volume71
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Change ngram_count for larger ngrams (trigrams etc)\n", "ngram_count = 2\n", "series_details.display_top_ngrams(title_text, ngram_count)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }