{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"series = 'A8703'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import os\n",
"import pandas as pd\n",
"import series_details\n",
"import plotly.offline as py\n",
"py.init_notebook_mode()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"
National Archives of Australia: Series A8703
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Film and sound recordings, multiple number series
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Total items | 641 |
---|
Access status | |
---|
Open | 328 (51.17%) |
Not yet examined | 313 (48.83%) |
Number of items digitised | 0 (0.00%) |
---|
Number of pages digitised | 0 |
---|
Date of earliest content | 1937 |
---|
Date of latest content | 1980 |
---|
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"series_details.display_summary(series, df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Content preview"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" identifier | \n",
" series | \n",
" control_symbol | \n",
" title | \n",
" contents_dates | \n",
" start_date | \n",
" end_date | \n",
" access_status | \n",
" location | \n",
" digitised_status | \n",
" digitised_pages | \n",
"
\n",
" \n",
" 0 | \n",
" 13167211 | \n",
" A8703 | \n",
" 1002872 | \n",
" Laurie Arons General Secretary Communist Party of Australia [CPA] - Wages claim circa 1960s 2. Gietzelt at Eric Aaron's home Sydney 30/9/1969 3. Communist Party of Australia [CPA] meeting Brisbane attended by Laurie Aarons 10/7/1971 4. Mesyatsev, Deurin and Laurie Aarons at Rushcutters Bay Sydney 6/4/1972 - Primary Version | \n",
" 1969 - 1972 | \n",
" 1969-01-01 00:00:00 | \n",
" 1972-01-01 00:00:00 | \n",
" Open | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
" 1 | \n",
" 13168111 | \n",
" A8703 | \n",
" 1004943 | \n",
" 7 Days [Episode 24] - Interview with Alec Robertson, Chairman of the Tribune - Primary Version | \n",
" circa1965 - circa1965 | \n",
" NaT | \n",
" NaT | \n",
" Open | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
" 2 | \n",
" 13168130 | \n",
" A8703 | \n",
" 1004952 | \n",
" 7 Days [Episode 6] - Dead Men on Leave and Episode 7: Disdain to Conceal - Primary Version | \n",
" circa1966 - circa1966 | \n",
" NaT | \n",
" NaT | \n",
" Not yet examined | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
" 3 | \n",
" 13187503 | \n",
" A8703 | \n",
" 1043564 | \n",
" Anti Japanese Rearmament Delegation to the Prime Minister Canberra - 27 February 1952 - Primary Version | \n",
" 1952 - 1952 | \n",
" 1952-01-01 00:00:00 | \n",
" 1952-01-01 00:00:00 | \n",
" Not yet examined | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
" 4 | \n",
" 13187998 | \n",
" A8703 | \n",
" 1044544 | \n",
" APCHOL Demonstration Melbourne 12 July 1968 - Primary Version | \n",
" 1968 - 1968 | \n",
" 1968-01-01 00:00:00 | \n",
" 1968-01-01 00:00:00 | \n",
" Open | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Change the number_of_rows value to see more\n",
"number_of_rows = 5\n",
"\n",
"# Display dataframe \n",
"df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector=\"th\", props=[(\"text-align\", \"center\")]),\n",
" dict(selector='.row_heading, .blank', props=[('display', 'none')])])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot content dates"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"data": [
{
"name": "Not digitised",
"type": "bar",
"x": [
1937,
1950,
1951,
1952,
1953,
1954,
1955,
1956,
1957,
1958,
1959,
1960,
1961,
1962,
1963,
1964,
1965,
1966,
1967,
1968,
1969,
1970,
1971,
1972,
1973,
1974,
1975,
1977,
1978,
1979,
1980
],
"y": [
64,
1,
7,
7,
8,
2,
16,
19,
23,
14,
12,
15,
35,
47,
16,
20,
9,
9,
13,
18,
12,
42,
14,
11,
9,
5,
3,
2,
1,
1,
1
]
}
],
"layout": {
"barmode": "stack",
"title": "Content dates",
"xaxis": {
"title": "Year"
},
"yaxis": {
"title": "Number of items"
}
}
},
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = series_details.plot_dates(df)\n",
"py.iplot(fig, filename='series-dates-bar')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View word frequencies"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Combine all of the file titles into a single string\n",
"title_text = a = df['title'].str.lower().str.cat(sep=' ')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" word | \n",
" count | \n",
"
\n",
" \n",
" 32 | \n",
" version | \n",
" 629 | \n",
"
\n",
" 31 | \n",
" primary | \n",
" 627 | \n",
"
\n",
" 6 | \n",
" australia | \n",
" 232 | \n",
"
\n",
" 5 | \n",
" party | \n",
" 218 | \n",
"
\n",
" 4 | \n",
" communist | \n",
" 205 | \n",
"
\n",
" 7 | \n",
" cpa | \n",
" 203 | \n",
"
\n",
" 96 | \n",
" conference | \n",
" 170 | \n",
"
\n",
" 17 | \n",
" sydney | \n",
" 108 | \n",
"
\n",
" 60 | \n",
" melbourne | \n",
" 102 | \n",
"
\n",
" 98 | \n",
" state | \n",
" 101 | \n",
"
\n",
" 66 | \n",
" march | \n",
" 92 | \n",
"
\n",
" 20 | \n",
" meeting | \n",
" 86 | \n",
"
\n",
" 115 | \n",
" day | \n",
" 83 | \n",
"
\n",
" 114 | \n",
" may | \n",
" 82 | \n",
"
\n",
" 99 | \n",
" national | \n",
" 71 | \n",
"
\n",
" 21 | \n",
" brisbane | \n",
" 55 | \n",
"
\n",
" 69 | \n",
" 1962 | \n",
" 51 | \n",
"
\n",
" 362 | \n",
" skripov | \n",
" 50 | \n",
"
\n",
" 65 | \n",
" congress | \n",
" 49 | \n",
"
\n",
" 363 | \n",
" case | \n",
" 49 | \n",
"
\n",
" 59 | \n",
" demonstration | \n",
" 46 | \n",
"
\n",
" 101 | \n",
" youth | \n",
" 43 | \n",
"
\n",
" 156 | \n",
" 1961 | \n",
" 40 | \n",
"
\n",
" 123 | \n",
" september | \n",
" 39 | \n",
"
\n",
" 102 | \n",
" league | \n",
" 38 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series_details.display_word_counts(title_text)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" ngram | \n",
" count | \n",
"
\n",
" \n",
" 0 | \n",
" primary version | \n",
" 627 | \n",
"
\n",
" 1 | \n",
" party of | \n",
" 207 | \n",
"
\n",
" 2 | \n",
" of australia | \n",
" 206 | \n",
"
\n",
" 3 | \n",
" communist party | \n",
" 205 | \n",
"
\n",
" 4 | \n",
" australia cpa | \n",
" 202 | \n",
"
\n",
" 5 | \n",
" version communist | \n",
" 123 | \n",
"
\n",
" 6 | \n",
" state conference | \n",
" 92 | \n",
"
\n",
" 7 | \n",
" may day | \n",
" 53 | \n",
"
\n",
" 8 | \n",
" skripov case | \n",
" 46 | \n",
"
\n",
" 9 | \n",
" version may | \n",
" 45 | \n",
"
\n",
" 10 | \n",
" national congress | \n",
" 44 | \n",
"
\n",
" 11 | \n",
" day march | \n",
" 44 | \n",
"
\n",
" 12 | \n",
" version skripov | \n",
" 43 | \n",
"
\n",
" 13 | \n",
" 1962 primary | \n",
" 40 | \n",
"
\n",
" 14 | \n",
" eureka youth | \n",
" 37 | \n",
"
\n",
" 15 | \n",
" youth league | \n",
" 37 | \n",
"
\n",
" 16 | \n",
" 1961 primary | \n",
" 36 | \n",
"
\n",
" 17 | \n",
" league eyl | \n",
" 33 | \n",
"
\n",
" 18 | \n",
" version eureka | \n",
" 27 | \n",
"
\n",
" 19 | \n",
" district conference | \n",
" 25 | \n",
"
\n",
" 20 | \n",
" 1970 primary | \n",
" 25 | \n",
"
\n",
" 21 | \n",
" congress communist | \n",
" 23 | \n",
"
\n",
" 22 | \n",
" 1957 primary | \n",
" 23 | \n",
"
\n",
" 23 | \n",
" 1956 primary | \n",
" 22 | \n",
"
\n",
" 24 | \n",
" 1960 primary | \n",
" 21 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Change ngram_count for larger ngrams (trigrams etc)\n",
"ngram_count = 2\n",
"series_details.display_top_ngrams(title_text, ngram_count)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}