{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Add dates to the file list\n", "\n", "Combine the list of files on CloudStor with the item list from the Archives catalogue so that we can easily tell which files belong to which year." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Harvested from CloudStor\n", "files_df = pd.read_csv('files.csv')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
directorynamepath
0AU NBAC N193-001/N193-001_0001.tifShared/ANU-Library/Sydney Stock Exchange 1901-...
1AU NBAC N193-001/N193-001_0002.tifShared/ANU-Library/Sydney Stock Exchange 1901-...
2AU NBAC N193-001/N193-001_0003.tifShared/ANU-Library/Sydney Stock Exchange 1901-...
3AU NBAC N193-001/N193-001_0004.tifShared/ANU-Library/Sydney Stock Exchange 1901-...
4AU NBAC N193-001/N193-001_0005.tifShared/ANU-Library/Sydney Stock Exchange 1901-...
\n", "
" ], "text/plain": [ " directory name \\\n", "0 AU NBAC N193-001/ N193-001_0001.tif \n", "1 AU NBAC N193-001/ N193-001_0002.tif \n", "2 AU NBAC N193-001/ N193-001_0003.tif \n", "3 AU NBAC N193-001/ N193-001_0004.tif \n", "4 AU NBAC N193-001/ N193-001_0005.tif \n", "\n", " path \n", "0 Shared/ANU-Library/Sydney Stock Exchange 1901-... \n", "1 Shared/ANU-Library/Sydney Stock Exchange 1901-... \n", "2 Shared/ANU-Library/Sydney Stock Exchange 1901-... \n", "3 Shared/ANU-Library/Sydney Stock Exchange 1901-... \n", "4 Shared/ANU-Library/Sydney Stock Exchange 1901-... " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files_df.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# From the catalogue\n", "items_df = pd.read_csv('itemList.csv')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
referenceCodetitledatesstartDateaccessConditionslocations
0N193-1Sydney Stock Exchange Stock and Share Lists1901-01-01 - 1901-03-011901-01-01NaNNaN
1N193-2Sydney Stock Exchange Stock and Share Lists1901-04-01 - 1901-06-011901-04-01NaNNaN
2N193-3Sydney Stock Exchange Stock and Share Lists1901-07-01 - 1901-09-011901-07-01NaNNaN
3N193-4Sydney Stock Exchange Stock and Share Lists1901-10-01 - 1901-12-011901-10-01NaNNaN
4N193-5Sydney Stock Exchange Stock and Share Lists1902-01-01 - 1902-03-011902-01-01NaNNaN
5N193-6Sydney Stock Exchange Stock and Share Lists1902-04-01 - 1902-06-011902-04-01NaNNaN
6N193-7Sydney Stock Exchange Stock and Share Lists1902-07-01 - 1902-09-011902-07-01NaNNaN
7N193-8Sydney Stock Exchange Stock and Share Lists1902-10-01 - 1902-12-011902-10-01NaNNaN
8N193-9Sydney Stock Exchange Stock and Share Lists1903-01-01 - 1903-03-011903-01-01NaNNaN
9N193-10Sydney Stock Exchange Stock and Share Lists1903-04-01 - 1903-06-011903-04-01NaNNaN
10N193-11Sydney Stock Exchange Stock and Share Lists1903-07-01 - 1903-09-011903-07-01NaNNaN
11N193-12Sydney Stock Exchange Stock and Share Lists1903-10-01 - 1903-12-011903-10-01NaNNaN
12N193-13Sydney Stock Exchange Stock and Share Lists1904-01-01 - 1904-03-011904-01-01NaNNaN
13N193-14Sydney Stock Exchange Stock and Share Lists1904-04-01 - 1904-06-011904-04-01NaNNaN
14N193-15Sydney Stock Exchange Stock and Share Lists1904-07-01 - 1904-09-011904-07-01NaNNaN
15N193-16Sydney Stock Exchange Stock and Share Lists1904-10-01 - 1904-12-011904-10-01NaNNaN
16N193-17Sydney Stock Exchange Stock and Share Lists1905-01-01 - 1905-03-011905-01-01NaNNaN
17N193-18Sydney Stock Exchange Stock and Share Lists1905-04-01 - 1905-06-011905-04-01NaNNaN
18N193-19Sydney Stock Exchange Stock and Share Lists1905-07-01 - 1905-09-011905-07-01NaNNaN
19N193-20Sydney Stock Exchange Stock and Share Lists1905-10-01 - 1905-12-011905-10-01NaNNaN
20N193-21Sydney Stock Exchange Stock and Share Lists1906-01-01 - 1906-03-011906-01-01NaNNaN
21N193-22Sydney Stock Exchange Stock and Share Lists1906-04-01 - 1906-06-011906-04-01NaNNaN
22N193-23Sydney Stock Exchange Stock and Share Lists1906-07-01 - 1906-09-011906-07-01NaNNaN
23N193-24Sydney Stock Exchange Stock and Share Lists1906-10-01 - 1906-12-011906-10-01NaNNaN
24N193-25Sydney Stock Exchange Stock and Share Lists1907-01-01 - 1907-03-011907-01-01NaNNaN
25N193-26Sydney Stock Exchange Stock and Share Lists1907-04-01 - 1907-06-011907-04-01NaNNaN
26N193-27Sydney Stock Exchange Stock and Share Lists1907-07-01 - 1907-09-011907-07-01NaNNaN
27N193-28Sydney Stock Exchange Stock and Share Lists1907-10-01 - 1907-12-011907-10-01NaNNaN
28N193-29Sydney Stock Exchange Stock and Share Lists1908-01-01 - 1908-03-011908-01-01NaNNaN
29N193-30Sydney Stock Exchange Stock and Share Lists1908-04-01 - 1908-06-011908-04-01NaNNaN
.....................
169N193-170Sydney Stock Exchange Stock and Share Lists1943-04-01 - 1943-06-011943-04-01NaNNaN
170N193-171Sydney Stock Exchange Stock and Share Lists1943-07-01 - 1943-09-011943-07-01NaNNaN
171N193-172Sydney Stock Exchange Stock and Share Lists1943-10-01 - 1943-12-011943-10-01NaNNaN
172N193-173Sydney Stock Exchange Stock and Share Lists1944-01-01 - 1944-03-011944-01-01NaNNaN
173N193-174Sydney Stock Exchange Stock and Share Lists1944-04-01 - 1944-06-011944-04-01NaNNaN
174N193-175Sydney Stock Exchange Stock and Share Lists1944-07-01 - 1944-09-011944-07-01NaNNaN
175N193-176Sydney Stock Exchange Stock and Share Lists1944-10-01 - 1944-12-011944-10-01NaNNaN
176N193-177Sydney Stock Exchange Stock and Share Lists1945-01-01 - 1945-03-011945-01-01NaNNaN
177N193-178Sydney Stock Exchange Stock and Share Lists1945-04-01 - 1945-06-011945-04-01NaNNaN
178N193-179Sydney Stock Exchange Stock and Share Lists1945-07-01 - 1945-09-011945-07-01NaNNaN
179N193-180Sydney Stock Exchange Stock and Share Lists1945-10-01 - 1945-12-011945-10-01NaNNaN
180N193-181Sydney Stock Exchange Stock and Share Lists1946-01-01 - 1946-03-011946-01-01NaNNaN
181N193-182Sydney Stock Exchange Stock and Share Lists1946-04-01 - 1946-06-011946-04-01NaNNaN
182N193-183Sydney Stock Exchange Stock and Share Lists1946-07-01 - 1946-09-011946-07-01NaNNaN
183N193-184Sydney Stock Exchange Stock and Share Lists1946-10-01 - 1946-12-011946-10-01NaNNaN
184N193-185Sydney Stock Exchange Stock and Share Lists1947-01-01 - 1947-03-011947-01-01NaNNaN
185N193-186Sydney Stock Exchange Stock and Share Lists1947-04-01 - 1947-06-011947-04-01NaNNaN
186N193-187Sydney Stock Exchange Stock and Share Lists1947-07-01 - 1947-09-011947-07-01NaNNaN
187N193-188Sydney Stock Exchange Stock and Share Lists1947-10-01 - 1947-12-011947-10-01NaNNaN
188N193-189Sydney Stock Exchange Stock and Share Lists1948-01-01 - 1948-03-011948-01-01NaNNaN
189N193-190Sydney Stock Exchange Stock and Share Lists1948-04-01 - 1948-06-011948-04-01NaNNaN
190N193-191Sydney Stock Exchange Stock and Share Lists1948-07-01 - 1948-09-011948-07-01NaNNaN
191N193-192Sydney Stock Exchange Stock and Share Lists1948-10-01 - 1948-12-011948-10-01NaNNaN
192N193-193Sydney Stock Exchange Stock and Share Lists1949-01-01 - 1949-03-011949-01-01NaNNaN
193N193-194Sydney Stock Exchange Stock and Share Lists1949-04-01 - 1949-06-011949-04-01NaNNaN
194N193-195Sydney Stock Exchange Stock and Share Lists1949-07-01 - 1949-09-011949-07-01NaNNaN
195N193-196Sydney Stock Exchange Stock and Share Lists1949-10-01 - 1949-12-011949-10-01NaNNaN
196N193-197Sydney Stock Exchange Stock and Share Lists1950-01-01 - 1950-03-011950-01-01NaNNaN
197N193-198Sydney Stock Exchange Stock and Share Lists1950-04-01 - 1950-06-011950-04-01NaNNaN
198N193-199Sydney Stock Exchange Stock Official List of P...1950-03-01 - 1950-07-011950-03-01NaNNaN
\n", "

199 rows × 6 columns

\n", "
" ], "text/plain": [ " referenceCode title \\\n", "0 N193-1 Sydney Stock Exchange Stock and Share Lists \n", "1 N193-2 Sydney Stock Exchange Stock and Share Lists \n", "2 N193-3 Sydney Stock Exchange Stock and Share Lists \n", "3 N193-4 Sydney Stock Exchange Stock and Share Lists \n", "4 N193-5 Sydney Stock Exchange Stock and Share Lists \n", "5 N193-6 Sydney Stock Exchange Stock and Share Lists \n", "6 N193-7 Sydney Stock Exchange Stock and Share Lists \n", "7 N193-8 Sydney Stock Exchange Stock and Share Lists \n", "8 N193-9 Sydney Stock Exchange Stock and Share Lists \n", "9 N193-10 Sydney Stock Exchange Stock and Share Lists \n", "10 N193-11 Sydney Stock Exchange Stock and Share Lists \n", "11 N193-12 Sydney Stock Exchange Stock and Share Lists \n", "12 N193-13 Sydney Stock Exchange Stock and Share Lists \n", "13 N193-14 Sydney Stock Exchange Stock and Share Lists \n", "14 N193-15 Sydney Stock Exchange Stock and Share Lists \n", "15 N193-16 Sydney Stock Exchange Stock and Share Lists \n", "16 N193-17 Sydney Stock Exchange Stock and Share Lists \n", "17 N193-18 Sydney Stock Exchange Stock and Share Lists \n", "18 N193-19 Sydney Stock Exchange Stock and Share Lists \n", "19 N193-20 Sydney Stock Exchange Stock and Share Lists \n", "20 N193-21 Sydney Stock Exchange Stock and Share Lists \n", "21 N193-22 Sydney Stock Exchange Stock and Share Lists \n", "22 N193-23 Sydney Stock Exchange Stock and Share Lists \n", "23 N193-24 Sydney Stock Exchange Stock and Share Lists \n", "24 N193-25 Sydney Stock Exchange Stock and Share Lists \n", "25 N193-26 Sydney Stock Exchange Stock and Share Lists \n", "26 N193-27 Sydney Stock Exchange Stock and Share Lists \n", "27 N193-28 Sydney Stock Exchange Stock and Share Lists \n", "28 N193-29 Sydney Stock Exchange Stock and Share Lists \n", "29 N193-30 Sydney Stock Exchange Stock and Share Lists \n", ".. ... ... \n", "169 N193-170 Sydney Stock Exchange Stock and Share Lists \n", "170 N193-171 Sydney Stock Exchange Stock and Share Lists \n", "171 N193-172 Sydney Stock Exchange Stock and Share Lists \n", "172 N193-173 Sydney Stock Exchange Stock and Share Lists \n", "173 N193-174 Sydney Stock Exchange Stock and Share Lists \n", "174 N193-175 Sydney Stock Exchange Stock and Share Lists \n", "175 N193-176 Sydney Stock Exchange Stock and Share Lists \n", "176 N193-177 Sydney Stock Exchange Stock and Share Lists \n", "177 N193-178 Sydney Stock Exchange Stock and Share Lists \n", "178 N193-179 Sydney Stock Exchange Stock and Share Lists \n", "179 N193-180 Sydney Stock Exchange Stock and Share Lists \n", "180 N193-181 Sydney Stock Exchange Stock and Share Lists \n", "181 N193-182 Sydney Stock Exchange Stock and Share Lists \n", "182 N193-183 Sydney Stock Exchange Stock and Share Lists \n", "183 N193-184 Sydney Stock Exchange Stock and Share Lists \n", "184 N193-185 Sydney Stock Exchange Stock and Share Lists \n", "185 N193-186 Sydney Stock Exchange Stock and Share Lists \n", "186 N193-187 Sydney Stock Exchange Stock and Share Lists \n", "187 N193-188 Sydney Stock Exchange Stock and Share Lists \n", "188 N193-189 Sydney Stock Exchange Stock and Share Lists \n", "189 N193-190 Sydney Stock Exchange Stock and Share Lists \n", "190 N193-191 Sydney Stock Exchange Stock and Share Lists \n", "191 N193-192 Sydney Stock Exchange Stock and Share Lists \n", "192 N193-193 Sydney Stock Exchange Stock and Share Lists \n", "193 N193-194 Sydney Stock Exchange Stock and Share Lists \n", "194 N193-195 Sydney Stock Exchange Stock and Share Lists \n", "195 N193-196 Sydney Stock Exchange Stock and Share Lists \n", "196 N193-197 Sydney Stock Exchange Stock and Share Lists \n", "197 N193-198 Sydney Stock Exchange Stock and Share Lists \n", "198 N193-199 Sydney Stock Exchange Stock Official List of P... \n", "\n", " dates startDate accessConditions locations \n", "0 1901-01-01 - 1901-03-01 1901-01-01 NaN NaN \n", "1 1901-04-01 - 1901-06-01 1901-04-01 NaN NaN \n", "2 1901-07-01 - 1901-09-01 1901-07-01 NaN NaN \n", "3 1901-10-01 - 1901-12-01 1901-10-01 NaN NaN \n", "4 1902-01-01 - 1902-03-01 1902-01-01 NaN NaN \n", "5 1902-04-01 - 1902-06-01 1902-04-01 NaN NaN \n", "6 1902-07-01 - 1902-09-01 1902-07-01 NaN NaN \n", "7 1902-10-01 - 1902-12-01 1902-10-01 NaN NaN \n", "8 1903-01-01 - 1903-03-01 1903-01-01 NaN NaN \n", "9 1903-04-01 - 1903-06-01 1903-04-01 NaN NaN \n", "10 1903-07-01 - 1903-09-01 1903-07-01 NaN NaN \n", "11 1903-10-01 - 1903-12-01 1903-10-01 NaN NaN \n", "12 1904-01-01 - 1904-03-01 1904-01-01 NaN NaN \n", "13 1904-04-01 - 1904-06-01 1904-04-01 NaN NaN \n", "14 1904-07-01 - 1904-09-01 1904-07-01 NaN NaN \n", "15 1904-10-01 - 1904-12-01 1904-10-01 NaN NaN \n", "16 1905-01-01 - 1905-03-01 1905-01-01 NaN NaN \n", "17 1905-04-01 - 1905-06-01 1905-04-01 NaN NaN \n", "18 1905-07-01 - 1905-09-01 1905-07-01 NaN NaN \n", "19 1905-10-01 - 1905-12-01 1905-10-01 NaN NaN \n", "20 1906-01-01 - 1906-03-01 1906-01-01 NaN NaN \n", "21 1906-04-01 - 1906-06-01 1906-04-01 NaN NaN \n", "22 1906-07-01 - 1906-09-01 1906-07-01 NaN NaN \n", "23 1906-10-01 - 1906-12-01 1906-10-01 NaN NaN \n", "24 1907-01-01 - 1907-03-01 1907-01-01 NaN NaN \n", "25 1907-04-01 - 1907-06-01 1907-04-01 NaN NaN \n", "26 1907-07-01 - 1907-09-01 1907-07-01 NaN NaN \n", "27 1907-10-01 - 1907-12-01 1907-10-01 NaN NaN \n", "28 1908-01-01 - 1908-03-01 1908-01-01 NaN NaN \n", "29 1908-04-01 - 1908-06-01 1908-04-01 NaN NaN \n", ".. ... ... ... ... \n", "169 1943-04-01 - 1943-06-01 1943-04-01 NaN NaN \n", "170 1943-07-01 - 1943-09-01 1943-07-01 NaN NaN \n", "171 1943-10-01 - 1943-12-01 1943-10-01 NaN NaN \n", "172 1944-01-01 - 1944-03-01 1944-01-01 NaN NaN \n", "173 1944-04-01 - 1944-06-01 1944-04-01 NaN NaN \n", "174 1944-07-01 - 1944-09-01 1944-07-01 NaN NaN \n", "175 1944-10-01 - 1944-12-01 1944-10-01 NaN NaN \n", "176 1945-01-01 - 1945-03-01 1945-01-01 NaN NaN \n", "177 1945-04-01 - 1945-06-01 1945-04-01 NaN NaN \n", "178 1945-07-01 - 1945-09-01 1945-07-01 NaN NaN \n", "179 1945-10-01 - 1945-12-01 1945-10-01 NaN NaN \n", "180 1946-01-01 - 1946-03-01 1946-01-01 NaN NaN \n", "181 1946-04-01 - 1946-06-01 1946-04-01 NaN NaN \n", "182 1946-07-01 - 1946-09-01 1946-07-01 NaN NaN \n", "183 1946-10-01 - 1946-12-01 1946-10-01 NaN NaN \n", "184 1947-01-01 - 1947-03-01 1947-01-01 NaN NaN \n", "185 1947-04-01 - 1947-06-01 1947-04-01 NaN NaN \n", "186 1947-07-01 - 1947-09-01 1947-07-01 NaN NaN \n", "187 1947-10-01 - 1947-12-01 1947-10-01 NaN NaN \n", "188 1948-01-01 - 1948-03-01 1948-01-01 NaN NaN \n", "189 1948-04-01 - 1948-06-01 1948-04-01 NaN NaN \n", "190 1948-07-01 - 1948-09-01 1948-07-01 NaN NaN \n", "191 1948-10-01 - 1948-12-01 1948-10-01 NaN NaN \n", "192 1949-01-01 - 1949-03-01 1949-01-01 NaN NaN \n", "193 1949-04-01 - 1949-06-01 1949-04-01 NaN NaN \n", "194 1949-07-01 - 1949-09-01 1949-07-01 NaN NaN \n", "195 1949-10-01 - 1949-12-01 1949-10-01 NaN NaN \n", "196 1950-01-01 - 1950-03-01 1950-01-01 NaN NaN \n", "197 1950-04-01 - 1950-06-01 1950-04-01 NaN NaN \n", "198 1950-03-01 - 1950-07-01 1950-03-01 NaN NaN \n", "\n", "[199 rows x 6 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "items_df" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Reference codes in the Archives item list are not zero padded.\n", "# Need to zero pad them so we can use them to link with the file list.\n", "def pad_ids(ref_id):\n", " collection, item = ref_id.split('-')\n", " padded = item.zfill(3)\n", " return '{}-{}'.format(collection, padded)\n", "\n", "items_df['referenceCode'] = items_df['referenceCode'].apply(pad_ids)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
referenceCodetitledatesstartDateaccessConditionslocations
0N193-001Sydney Stock Exchange Stock and Share Lists1901-01-01 - 1901-03-011901-01-01NaNNaN
1N193-002Sydney Stock Exchange Stock and Share Lists1901-04-01 - 1901-06-011901-04-01NaNNaN
2N193-003Sydney Stock Exchange Stock and Share Lists1901-07-01 - 1901-09-011901-07-01NaNNaN
3N193-004Sydney Stock Exchange Stock and Share Lists1901-10-01 - 1901-12-011901-10-01NaNNaN
4N193-005Sydney Stock Exchange Stock and Share Lists1902-01-01 - 1902-03-011902-01-01NaNNaN
\n", "
" ], "text/plain": [ " referenceCode title \\\n", "0 N193-001 Sydney Stock Exchange Stock and Share Lists \n", "1 N193-002 Sydney Stock Exchange Stock and Share Lists \n", "2 N193-003 Sydney Stock Exchange Stock and Share Lists \n", "3 N193-004 Sydney Stock Exchange Stock and Share Lists \n", "4 N193-005 Sydney Stock Exchange Stock and Share Lists \n", "\n", " dates startDate accessConditions locations \n", "0 1901-01-01 - 1901-03-01 1901-01-01 NaN NaN \n", "1 1901-04-01 - 1901-06-01 1901-04-01 NaN NaN \n", "2 1901-07-01 - 1901-09-01 1901-07-01 NaN NaN \n", "3 1901-10-01 - 1901-12-01 1901-10-01 NaN NaN \n", "4 1902-01-01 - 1902-03-01 1902-01-01 NaN NaN " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Now nicely zero padded\n", "items_df.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# There's a startDate coulmn in the item list, but not a separate endDate.\n", "# Let's extract the endDate from the dates string and create a new column for it.\n", "items_df['endDate'] = items_df['dates'].str.slice(13)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Extract year from the startDate and create a new column for it.\n", "items_df['year'] = items_df['startDate'].str.slice(0,4)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "dates_df = items_df[['referenceCode', 'startDate', 'endDate', 'year']]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# Extract the referenceCode from the directory in the file list and create a new column.\n", "files_df['referenceCode'] = files_df['directory'].str.slice(8,16)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(72932, 4)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "files_df.shape" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# Ok, now we can use the 'referenceCode' column to link and merge both lists.\n", "new_df = pd.merge(files_df, dates_df, on='referenceCode')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
directorynamepathreferenceCodestartDateendDateyear
0AU NBAC N193-001/N193-001_0001.tifShared/ANU-Library/Sydney Stock Exchange 1901-...N193-0011901-01-011901-03-011901
1AU NBAC N193-001/N193-001_0002.tifShared/ANU-Library/Sydney Stock Exchange 1901-...N193-0011901-01-011901-03-011901
2AU NBAC N193-001/N193-001_0003.tifShared/ANU-Library/Sydney Stock Exchange 1901-...N193-0011901-01-011901-03-011901
3AU NBAC N193-001/N193-001_0004.tifShared/ANU-Library/Sydney Stock Exchange 1901-...N193-0011901-01-011901-03-011901
4AU NBAC N193-001/N193-001_0005.tifShared/ANU-Library/Sydney Stock Exchange 1901-...N193-0011901-01-011901-03-011901
\n", "
" ], "text/plain": [ " directory name \\\n", "0 AU NBAC N193-001/ N193-001_0001.tif \n", "1 AU NBAC N193-001/ N193-001_0002.tif \n", "2 AU NBAC N193-001/ N193-001_0003.tif \n", "3 AU NBAC N193-001/ N193-001_0004.tif \n", "4 AU NBAC N193-001/ N193-001_0005.tif \n", "\n", " path referenceCode \\\n", "0 Shared/ANU-Library/Sydney Stock Exchange 1901-... N193-001 \n", "1 Shared/ANU-Library/Sydney Stock Exchange 1901-... N193-001 \n", "2 Shared/ANU-Library/Sydney Stock Exchange 1901-... N193-001 \n", "3 Shared/ANU-Library/Sydney Stock Exchange 1901-... N193-001 \n", "4 Shared/ANU-Library/Sydney Stock Exchange 1901-... N193-001 \n", "\n", " startDate endDate year \n", "0 1901-01-01 1901-03-01 1901 \n", "1 1901-01-01 1901-03-01 1901 \n", "2 1901-01-01 1901-03-01 1901 \n", "3 1901-01-01 1901-03-01 1901 \n", "4 1901-01-01 1901-03-01 1901 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# So now each individual image file has a year!\n", "new_df.head()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "# Save as a CSV file\n", "new_df.to_csv('files_with_dates.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1941 2397\n", "1940 2374\n", "1939 2338\n", "1938 2318\n", "1937 2149\n", "1949 1937\n", "1942 1690\n", "1948 1681\n", "1943 1645\n", "1944 1640\n", "1947 1601\n", "1945 1589\n", "1936 1538\n", "1935 1530\n", "1946 1437\n", "1934 1368\n", "1904 1321\n", "1909 1319\n", "1907 1318\n", "1903 1314\n", "1905 1313\n", "1912 1312\n", "1908 1310\n", "1920 1309\n", "1902 1306\n", "1911 1304\n", "1928 1303\n", "1931 1301\n", "1913 1299\n", "1932 1299\n", "1930 1298\n", "1906 1298\n", "1910 1298\n", "1933 1297\n", "1929 1296\n", "1926 1295\n", "1924 1294\n", "1927 1293\n", "1919 1289\n", "1915 1287\n", "1922 1287\n", "1925 1286\n", "1923 1284\n", "1918 1283\n", "1916 1283\n", "1917 1280\n", "1921 1276\n", "1901 1272\n", "1914 1123\n", "1950 1053\n", "Name: year, dtype: int64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_df['year'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }