{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test dates in a volume\n",
"\n",
"This notebook compares predictions about the dates of individual pages with images of the actual pages. Put simply, it helps you quickly work out where your predictions are going wrong."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import arrow\n",
"from IPython.display import display, HTML"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"series = pd.read_csv('series_list.csv')[:-1]\n",
"all_holidays = pd.read_csv('nsw_holidays_1900_1950.csv')\n",
"#all_holidays.loc[:, 'date'] = pd.to_datetime(all_holidays.loc[:, 'date'], errors='coerce')\n",
"\n",
"def get_holidays(year):\n",
" holidays = all_holidays.loc[all_holidays['year'] == year]['date']\n",
" return holidays.to_list()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"pages_per_vol = {\n",
" '1_134': {\n",
" 'weekday': 5,\n",
" 'saturday': 2\n",
" },\n",
" '135_145': {\n",
" 'weekday': 6,\n",
" 'saturday': 2\n",
" },\n",
" '146_164': {\n",
" 'weekday': 9,\n",
" 'saturday': 3\n",
" },\n",
" '165_190': {\n",
" 'weekday': 6,\n",
" 'saturday': 3\n",
" },\n",
" '191_199': {\n",
" 'weekday': 8,\n",
" 'saturday': 0\n",
" }\n",
"}\n",
"\n",
"def get_pages(vol_num):\n",
" for key, pages in pages_per_vol.items():\n",
" vols = key.split('_')\n",
" vols = [int(y) for y in vols]\n",
" if len(vols) == 2:\n",
" vols = list(range(vols[0], vols[1] + 1))\n",
" if vol_num in vols:\n",
" return pages"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def predict_pages(start_date, end_date, weekday_pages=5, saturday_pages=2, include_saturday=True):\n",
" pages = 0\n",
" year = start_date.year\n",
" holidays = sorted(get_holidays(year))\n",
" for single_date in daterange(start_date, end_date):\n",
" if single_date not in holidays and single_date.weekday() != 6:\n",
" if single_date.weekday() == 5:\n",
" if include_saturday is True:\n",
" pages += saturday_pages\n",
" else:\n",
" pages += weekday_pages\n",
" return pages"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def daterange(start_date, end_date):\n",
" for n in range(int ((end_date - start_date).days)):\n",
" yield start_date.shift(days=+n)\n",
"\n",
"def test_volume(volume, weekday_pages=5, saturday_pages=2, include_saturday=True):\n",
" vol_num = int(volume.split('-')[1])\n",
" volume_details = series.dropna(subset=['Item_number']).loc[series['Item_number'].str.endswith(volume)].iloc[0]\n",
" start_date = arrow.get(volume_details['start_date'], 'YYYY-MM-DD')\n",
" end_date = arrow.get(volume_details['end_date'], 'YYYY-MM-DD').shift(days=+1)\n",
" pages_vol = get_pages(vol_num)\n",
" year = start_date.year\n",
" holidays = get_holidays(year)\n",
" print(holidays)\n",
" page = 1\n",
" for v_date in daterange(start_date, end_date):\n",
" v_date_iso = v_date.format('YYYY-MM-DD')\n",
" print(v_date_iso)\n",
" if v_date_iso not in holidays and v_date.weekday() != 6 and not (v_date.format('YYYY-MM-DD') in missing and missing[v_date.format('YYYY-MM-DD')] == 0):\n",
" if v_date_iso in missing:\n",
" page += missing[v_date_iso]\n",
" elif v_date.weekday() == 5:\n",
" if include_saturday is True:\n",
" page += pages_vol['saturday']\n",
" else:\n",
" page += pages_vol['weekday']\n",
" next_date = v_date.shift(days=+1)\n",
" while next_date.format('YYYY-MM-DD') in holidays or next_date.weekday() == 6 or (next_date.format('YYYY-MM-DD') in missing and missing[next_date.format('YYYY-MM-DD')] == 0):\n",
" next_date = next_date.shift(days=+1)\n",
" print(f'Expected date: {next_date.format(\"D MMMM YYYY\")} / Page N193-{vol_num:03}_{page:04}')\n",
" display(HTML(f''))\n",
" \n",
" # What do we do with duplicates?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The cell below contains information about adjustments that need to be made based on the testing. Once you've found a problem, you record the adjustment and run the test again."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"missing = {\n",
" '1901-01-07': 3,\n",
" '1901-01-18': 4,\n",
" '1901-01-23': 0, # Death of the Queen business abandoned https://trove.nla.gov.au/newspaper/article/14371864/1343690\n",
" '1901-02-25': 4,\n",
" '1901-03-18': 0,\n",
" '1901-03-29': 0, # missing\n",
" '1901-04-04': 3, # No afternoon, day before Easter\n",
" '1901-04-09': 0, # Extra Easter Tuesday\n",
" '1901-04-10': 0, # Extra Easter Wednesday\n",
" '1901-05-27': 0, # Holiday Duke of Cornwall visiting\n",
" '1901-05-28': 0, # Holiday Duke of Cornwall visiting\n",
" '1901-07-03': 0, # Holiday for polling day\n",
" '1901-09-16': 4, # No morning\n",
" '1901-10-10': 4, # 1 Noon\n",
" '1901-10-30': 4, # 1 Noon\n",
" '1901-12-16': 2, # Noon only\n",
" '1902-02-26': 0, # ??\n",
" '1902-04-02': 3, # No afternboon\n",
" '1902-06-26': 0, # ??\n",
" '1902-08-09': 0, #??\n",
" '1902-10-17': 6, # 008_0063 is a duplicate \n",
" '1903-01-06': 4, # 1 afternoon missing\n",
" '1903-01-09': 4, # morning missing\n",
" '1903-04-09': 3, # No afternoon, day before Easter\n",
" '1903-04-14': 0, # Easter Tuesday\n",
" # 1903-09-02 has no morning, but 3 noons\n",
" '1903-09-08': 4, # no morning\n",
" # 1903-09-16 has no morning, but 3 noons\n",
" '1903-10-01': 3, # no afternoon\n",
" '1903-11-18': 3, # no morning, 1 noon -- see 219 and 220!\n",
" '1903-11-30': 7, # 2 sheets from 1903-11-18 inserted\n",
" '1903-12-16': 0, # ??\n",
" '1904-01-20': 3, # no afternoon\n",
" '1904-08-15': 3, # no afternoon\n",
" '1904-11-09': 6, # 016_145 is a duplicate\n",
" '1905-03-02': 6, # 017_213 is a duplicate\n",
" '1905-03-08': 6, # 017_239 is a duplicate\n",
" '1905-04-20': 3, # No afternoon, day before Easter\n",
" '1905-04-25': 0, # Easter Tuesday\n",
" '1905-04-26': 0, # Easter Wednesday\n",
" '1906-03-19': 6, # extra page, 282 is from 1906-03-21\n",
" '1906-03-21': 4, # 1 page included in 1906-03-19\n",
" '1906-04-02': 4, # 1 afternoon missing\n",
" '1906-04-06': 4, # 1 afternoon missing\n",
" '1906-04-09': 4, # 1 afternoon missing\n",
" '1906-04-10': 4, # 1 afternoon missing\n",
" '1906-04-11': 4, # 1 afternoon missing\n",
" '1906-04-12': 3, # No afternoon, day before Easter\n",
" '1906-04-17': 0, # Easter Tuesday\n",
" '1906-04-18': 0, # Easter Wednesday\n",
" '1906-04-25': 4, # 1 afternoon missing\n",
" '1906-05-02': 4, # 1 afternoon missing\n",
" '1906-05-03': 4, # 1 afternoon missing\n",
" '1906-07-12': 4, # 1 afternoon missing\n",
" '1906-07-16': 4, # 1 afternoon missing\n",
" '1906-10-25': 3, # Afternoon missing\n",
" '1907-02-02': 1, # Saturday 1 page only\n",
" '1907-03-08': 4, # 1 afternoon missing\n",
" '1907-04-29': 4, # 1 afternoon missing\n",
" '1907-06-27': 2, # 2 pages only marked '11 o'clock'\n",
" '1907-09-10': 3, # No afternoon\n",
" '1907-10-11': 4, # 1 afternoon missing\n",
" '1907-11-29': 4, # 1 afternoon missing\n",
" '1907-12-02': 4, # 1 afternoon missing\n",
" '1908-03-12': 4, # 1 afternoon missing\n",
" '1908-04-16': 3, # No afternoon, day before Easter\n",
" '1908-04-21': 0, # Easter Tuesday\n",
" '1908-08-20': 0, # American Fleet visit!\n",
" '1908-08-21': 3, # No morning?\n",
" '1908-08-24': 0, # American Fleet visit!\n",
" '1908-11-14': 1, # Saturday 1 page only\n",
" '1929-03-01': 4,\n",
" '1929-03-12': 3,\n",
" '1929-03-27': 3,\n",
" '1930-02-26': 3,\n",
" '1930-04-17': 3,\n",
" '1930-04-22': 0,\n",
" '1930-04-23': 0,\n",
" '1930-04-24': 0,\n",
" '1930-04-26': 0,\n",
" '1930-05-09': 3,\n",
" '1930-12-23': 3\n",
"}\n",
"\n",
"duplicates = [\n",
" '008_0063',\n",
" '016_145',\n",
" '017_213',\n",
" '119_265'\n",
"]\n",
"\n",
"backwards = [\n",
" '120'\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Test a volume!\n",
"\n",
"Ignore the final row, it's not an error."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['1937-01-01', '1937-02-01', '1937-03-26', '1937-03-27', '1937-03-29', '1937-04-26', '1937-05-12', '1937-08-02', '1937-10-04', '1937-12-25', '1937-12-27']\n",
"1937-04-01\n",
"Expected date: 2 April 1937 / Page N193-146_0010\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-02\n",
"Expected date: 3 April 1937 / Page N193-146_0019\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-03\n",
"Expected date: 5 April 1937 / Page N193-146_0022\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-04\n",
"1937-04-05\n",
"Expected date: 6 April 1937 / Page N193-146_0031\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-06\n",
"Expected date: 7 April 1937 / Page N193-146_0040\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-07\n",
"Expected date: 8 April 1937 / Page N193-146_0049\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-08\n",
"Expected date: 9 April 1937 / Page N193-146_0058\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-09\n",
"Expected date: 10 April 1937 / Page N193-146_0067\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-10\n",
"Expected date: 12 April 1937 / Page N193-146_0070\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-11\n",
"1937-04-12\n",
"Expected date: 13 April 1937 / Page N193-146_0079\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-13\n",
"Expected date: 14 April 1937 / Page N193-146_0088\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-14\n",
"Expected date: 15 April 1937 / Page N193-146_0097\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-15\n",
"Expected date: 16 April 1937 / Page N193-146_0106\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-16\n",
"Expected date: 17 April 1937 / Page N193-146_0115\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-17\n",
"Expected date: 19 April 1937 / Page N193-146_0118\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-18\n",
"1937-04-19\n",
"Expected date: 20 April 1937 / Page N193-146_0127\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-20\n",
"Expected date: 21 April 1937 / Page N193-146_0136\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-21\n",
"Expected date: 22 April 1937 / Page N193-146_0145\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-22\n",
"Expected date: 23 April 1937 / Page N193-146_0154\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-23\n",
"Expected date: 24 April 1937 / Page N193-146_0163\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-24\n",
"Expected date: 27 April 1937 / Page N193-146_0166\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-25\n",
"1937-04-26\n",
"1937-04-27\n",
"Expected date: 28 April 1937 / Page N193-146_0175\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-28\n",
"Expected date: 29 April 1937 / Page N193-146_0184\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-29\n",
"Expected date: 30 April 1937 / Page N193-146_0193\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-04-30\n",
"Expected date: 1 May 1937 / Page N193-146_0202\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-01\n",
"Expected date: 3 May 1937 / Page N193-146_0205\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-02\n",
"1937-05-03\n",
"Expected date: 4 May 1937 / Page N193-146_0214\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-04\n",
"Expected date: 5 May 1937 / Page N193-146_0223\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-05\n",
"Expected date: 6 May 1937 / Page N193-146_0232\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-06\n",
"Expected date: 7 May 1937 / Page N193-146_0241\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-07\n",
"Expected date: 8 May 1937 / Page N193-146_0250\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-08\n",
"Expected date: 10 May 1937 / Page N193-146_0253\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-09\n",
"1937-05-10\n",
"Expected date: 11 May 1937 / Page N193-146_0262\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-11\n",
"Expected date: 13 May 1937 / Page N193-146_0271\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-12\n",
"1937-05-13\n",
"Expected date: 14 May 1937 / Page N193-146_0280\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-14\n",
"Expected date: 15 May 1937 / Page N193-146_0289\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-15\n",
"Expected date: 17 May 1937 / Page N193-146_0292\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-16\n",
"1937-05-17\n",
"Expected date: 18 May 1937 / Page N193-146_0301\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-18\n",
"Expected date: 19 May 1937 / Page N193-146_0310\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-19\n",
"Expected date: 20 May 1937 / Page N193-146_0319\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-20\n",
"Expected date: 21 May 1937 / Page N193-146_0328\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-21\n",
"Expected date: 22 May 1937 / Page N193-146_0337\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-22\n",
"Expected date: 24 May 1937 / Page N193-146_0340\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-23\n",
"1937-05-24\n",
"Expected date: 25 May 1937 / Page N193-146_0349\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-25\n",
"Expected date: 26 May 1937 / Page N193-146_0358\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-26\n",
"Expected date: 27 May 1937 / Page N193-146_0367\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-27\n",
"Expected date: 28 May 1937 / Page N193-146_0376\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-28\n",
"Expected date: 29 May 1937 / Page N193-146_0385\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-29\n",
"Expected date: 31 May 1937 / Page N193-146_0388\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-05-30\n",
"1937-05-31\n",
"Expected date: 1 June 1937 / Page N193-146_0397\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-01\n",
"Expected date: 2 June 1937 / Page N193-146_0406\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-02\n",
"Expected date: 3 June 1937 / Page N193-146_0415\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-03\n",
"Expected date: 4 June 1937 / Page N193-146_0424\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-04\n",
"Expected date: 5 June 1937 / Page N193-146_0433\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-05\n",
"Expected date: 7 June 1937 / Page N193-146_0436\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-06\n",
"1937-06-07\n",
"Expected date: 8 June 1937 / Page N193-146_0445\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-08\n",
"Expected date: 9 June 1937 / Page N193-146_0454\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-09\n",
"Expected date: 10 June 1937 / Page N193-146_0463\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-10\n",
"Expected date: 11 June 1937 / Page N193-146_0472\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-11\n",
"Expected date: 12 June 1937 / Page N193-146_0481\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-12\n",
"Expected date: 14 June 1937 / Page N193-146_0484\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-13\n",
"1937-06-14\n",
"Expected date: 15 June 1937 / Page N193-146_0493\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-15\n",
"Expected date: 16 June 1937 / Page N193-146_0502\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-16\n",
"Expected date: 17 June 1937 / Page N193-146_0511\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-17\n",
"Expected date: 18 June 1937 / Page N193-146_0520\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-18\n",
"Expected date: 19 June 1937 / Page N193-146_0529\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-19\n",
"Expected date: 21 June 1937 / Page N193-146_0532\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-20\n",
"1937-06-21\n",
"Expected date: 22 June 1937 / Page N193-146_0541\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-22\n",
"Expected date: 23 June 1937 / Page N193-146_0550\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-23\n",
"Expected date: 24 June 1937 / Page N193-146_0559\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-24\n",
"Expected date: 25 June 1937 / Page N193-146_0568\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-25\n",
"Expected date: 26 June 1937 / Page N193-146_0577\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-26\n",
"Expected date: 28 June 1937 / Page N193-146_0580\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-27\n",
"1937-06-28\n",
"Expected date: 29 June 1937 / Page N193-146_0589\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-29\n",
"Expected date: 30 June 1937 / Page N193-146_0598\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"1937-06-30\n",
"Expected date: 1 July 1937 / Page N193-146_0607\n"
]
},
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"test_volume('N193-146')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Rename some of the files to make them standard"
]
},
{
"cell_type": "code",
"execution_count": 215,
"metadata": {},
"outputs": [],
"source": [
"# Rename vol 14\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"img_dir = Path('all_headers/AU NBAC N193-014')\n",
"images = img_dir.glob('*.jpg')\n",
"for image in images:\n",
" parts = image.name.split('-')\n",
" new_name = Path(f'all_headers/AU NBAC N193-014/{parts[0]}-0{parts[1]}-{parts[2]}')\n",
" image.rename(new_name)\n"
]
},
{
"cell_type": "code",
"execution_count": 240,
"metadata": {},
"outputs": [],
"source": [
"# Rename vol 18\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"img_dir = Path('all_headers/AU NBAC N193-018')\n",
"images = img_dir.glob('*.jpg')\n",
"for image in images:\n",
" new_name = Path(f'all_headers/AU NBAC N193-018/{image.name.replace(\".-\", \"-\")}')\n",
" image.rename(new_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}