{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"series = 'A6281'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import os\n",
"import pandas as pd\n",
"import series_details\n",
"import plotly.offline as py\n",
"py.init_notebook_mode()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(os.path.join('data', '{}.csv'.format(series.replace('/', '-'))), parse_dates=['start_date', 'end_date'])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"
National Archives of Australia: Series A6281
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Long-playing gramophone records relating to the Royal Commission on Espionage, alpha-numeric series
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Total items | 17 |
---|
Access status | |
---|
Open | 11 (64.71%) |
Not yet examined | 5 (29.41%) |
Open with exception | 1 (5.88%) |
Number of items digitised | 0 (0.00%) |
---|
Number of pages digitised | 0 |
---|
Date of earliest content | None |
---|
Date of latest content | nan |
---|
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"series_details.display_summary(series, df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Content preview"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" identifier | \n",
" series | \n",
" control_symbol | \n",
" title | \n",
" contents_dates | \n",
" start_date | \n",
" end_date | \n",
" access_status | \n",
" location | \n",
" digitised_status | \n",
" digitised_pages | \n",
"
\n",
" \n",
" 0 | \n",
" 13187837 | \n",
" A6281 | \n",
" 1042788 | \n",
" Conversation Between Dr Michael Bialoguski, Vladimir Petrov And Ron Richards. Conversation Between Dr Michael Bialoguski And Ron Richards. Conversation Between Dr Bialoguski And Vladimir Petrov. - Primary Version | \n",
" circa1954 - circa1954 | \n",
" NaT | \n",
" NaT | \n",
" Open | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
" 1 | \n",
" 13187838 | \n",
" A6281 | \n",
" 1042796 | \n",
" Conversation between Dr Michael Bialoguski, Vladimir Petrov and Ron Richards. Conversation between Vladimir Petrov and Ron Richards - Primary Version | \n",
" circa1954 - circa1954 | \n",
" NaT | \n",
" NaT | \n",
" Open | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
" 2 | \n",
" 13187840 | \n",
" A6281 | \n",
" 1042945 | \n",
" Parliamentary Debate On The Royal Commission On Espionage [House Of Representatives, 25 October 1955 - Rg Menzies - Part 2] - Primary Version | \n",
" circa1955 - circa1955 | \n",
" NaT | \n",
" NaT | \n",
" Open | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
" 3 | \n",
" 13187841 | \n",
" A6281 | \n",
" 1042955 | \n",
" Parliamentary Debate On The Royal Commission On Espionage [House Of Representatives, 25 October 1955 - Ej Ward - Part 2 And Sm Keon] - Primary Version | \n",
" circa1955 - circa1955 | \n",
" NaT | \n",
" NaT | \n",
" Not yet examined | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
" 4 | \n",
" 13187854 | \n",
" A6281 | \n",
" 1042808 | \n",
" Conversation between Vladimir Petrov and Ron Richards - Primary Version | \n",
" circa1954 - circa1954 | \n",
" NaT | \n",
" NaT | \n",
" Open | \n",
" Various locations | \n",
" False | \n",
" 0 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Change the number_of_rows value to see more\n",
"number_of_rows = 5\n",
"\n",
"# Display dataframe \n",
"df[:number_of_rows].style.set_properties(['title'], **{'text-align': 'left'}).set_table_styles([dict(selector=\"th\", props=[(\"text-align\", \"center\")]),\n",
" dict(selector='.row_heading, .blank', props=[('display', 'none')])])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## View word frequencies"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Combine all of the file titles into a single string\n",
"title_text = a = df['title'].str.lower().str.cat(sep=' ')"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" word | \n",
" count | \n",
"
\n",
" \n",
" 8 | \n",
" primary | \n",
" 17 | \n",
"
\n",
" 9 | \n",
" version | \n",
" 17 | \n",
"
\n",
" 0 | \n",
" conversation | \n",
" 11 | \n",
"
\n",
" 19 | \n",
" 1955 | \n",
" 11 | \n",
"
\n",
" 16 | \n",
" representatives | \n",
" 11 | \n",
"
\n",
" 15 | \n",
" house | \n",
" 11 | \n",
"
\n",
" 5 | \n",
" petrov | \n",
" 11 | \n",
"
\n",
" 11 | \n",
" debate | \n",
" 10 | \n",
"
\n",
" 14 | \n",
" espionage | \n",
" 10 | \n",
"
\n",
" 13 | \n",
" commission | \n",
" 10 | \n",
"
\n",
" 12 | \n",
" royal | \n",
" 10 | \n",
"
\n",
" 18 | \n",
" october | \n",
" 10 | \n",
"
\n",
" 10 | \n",
" parliamentary | \n",
" 10 | \n",
"
\n",
" 7 | \n",
" richards | \n",
" 10 | \n",
"
\n",
" 22 | \n",
" part | \n",
" 9 | \n",
"
\n",
" 6 | \n",
" ron | \n",
" 8 | \n",
"
\n",
" 4 | \n",
" vladimir | \n",
" 8 | \n",
"
\n",
" 17 | \n",
" 25 | \n",
" 7 | \n",
"
\n",
" 1 | \n",
" dr | \n",
" 5 | \n",
"
\n",
" 3 | \n",
" bialoguski | \n",
" 5 | \n",
"
\n",
" 23 | \n",
" 2 | \n",
" 5 | \n",
"
\n",
" 2 | \n",
" michael | \n",
" 4 | \n",
"
\n",
" 21 | \n",
" menzies | \n",
" 4 | \n",
"
\n",
" 35 | \n",
" 19 | \n",
" 3 | \n",
"
\n",
" 39 | \n",
" bourke | \n",
" 3 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"series_details.display_word_counts(title_text)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" \n",
" \n",
" | \n",
" ngram | \n",
" count | \n",
"
\n",
" \n",
" 0 | \n",
" primary version | \n",
" 17 | \n",
"
\n",
" 1 | \n",
" of representatives | \n",
" 11 | \n",
"
\n",
" 2 | \n",
" house of | \n",
" 11 | \n",
"
\n",
" 3 | \n",
" conversation between | \n",
" 11 | \n",
"
\n",
" 4 | \n",
" commission on | \n",
" 10 | \n",
"
\n",
" 5 | \n",
" the royal | \n",
" 10 | \n",
"
\n",
" 6 | \n",
" version parliamentary | \n",
" 10 | \n",
"
\n",
" 7 | \n",
" october 1955 | \n",
" 10 | \n",
"
\n",
" 8 | \n",
" on espionage | \n",
" 10 | \n",
"
\n",
" 9 | \n",
" debate on | \n",
" 10 | \n",
"
\n",
" 10 | \n",
" on the | \n",
" 10 | \n",
"
\n",
" 11 | \n",
" royal commission | \n",
" 10 | \n",
"
\n",
" 12 | \n",
" parliamentary debate | \n",
" 10 | \n",
"
\n",
" 13 | \n",
" espionage house | \n",
" 9 | \n",
"
\n",
" 14 | \n",
" vladimir petrov | \n",
" 8 | \n",
"
\n",
" 15 | \n",
" ron richards | \n",
" 8 | \n",
"
\n",
" 16 | \n",
" representatives 25 | \n",
" 7 | \n",
"
\n",
" 17 | \n",
" and ron | \n",
" 6 | \n",
"
\n",
" 18 | \n",
" 25 october | \n",
" 6 | \n",
"
\n",
" 19 | \n",
" part 2 | \n",
" 5 | \n",
"
\n",
" 20 | \n",
" petrov and | \n",
" 5 | \n",
"
\n",
" 21 | \n",
" between dr | \n",
" 5 | \n",
"
\n",
" 22 | \n",
" version conversation | \n",
" 5 | \n",
"
\n",
" 23 | \n",
" michael bialoguski | \n",
" 4 | \n",
"
\n",
" 24 | \n",
" dr michael | \n",
" 4 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Change ngram_count for larger ngrams (trigrams etc)\n",
"ngram_count = 2\n",
"series_details.display_top_ngrams(title_text, ngram_count)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}