{ "cells": [ { "cell_type": "code", "execution_count": 84, "id": "accessible-lender", "metadata": {}, "outputs": [], "source": [ "import io\n", "import json\n", "import sqlite3\n", "import zipfile\n", "from pathlib import Path\n", "\n", "import markdown2\n", "import pandas as pd\n", "import requests_cache\n", "from requests.adapters import HTTPAdapter\n", "from requests.packages.urllib3.util.retry import Retry\n", "from slugify import slugify\n", "from sqlite_utils import Database\n", "\n", "s = requests_cache.CachedSession()\n", "retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])\n", "s.mount(\"https://\", HTTPAdapter(max_retries=retries))\n", "s.mount(\"http://\", HTTPAdapter(max_retries=retries))" ] }, { "cell_type": "markdown", "id": "914151c9-7133-419c-ae8c-8bf7376663dc", "metadata": {}, "source": [ "## Save local copies of all CSV datasets" ] }, { "cell_type": "code", "execution_count": 184, "id": "graphic-phone", "metadata": {}, "outputs": [], "source": [ "df_csvs = pd.read_csv(\"glam-datasets-from-gov-portals-csvs.csv\")" ] }, { "cell_type": "code", "execution_count": 185, "id": "loved-kennedy", "metadata": {}, "outputs": [], "source": [ "df_csvs.fillna(\"\", inplace=True)" ] }, { "cell_type": "code", "execution_count": 186, "id": "pressing-cannon", "metadata": {}, "outputs": [], "source": [ "df_csvs[\"file_index\"] = df_csvs.apply(\n", " lambda x: f'{slugify(x[\"publisher\"])}-{slugify(x[\"file_title\"])}-{slugify(x[\"file_created\"][:10])}',\n", " axis=1,\n", ")" ] }, { "cell_type": "code", "execution_count": 106, "id": "suited-behavior", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PROV Digitisation Program statistics\n", "PROV Workforce Data 19-20\n", "PROV Annual Report - Records Issued & Visitor Statistics 2015-2016\n" ] } ], "source": [ "def read_csv(url, header=0, encoding=0):\n", " \"\"\"\n", " Loop through some encoding/parsing options to see if we can get the CSV to open properly.\n", " \"\"\"\n", " encodings = [\"ISO-8859-1\", \"latin-1\"]\n", " headers = [None]\n", " try:\n", " if encoding > 0 and header > 0:\n", " df = pd.read_csv(\n", " url,\n", " sep=None,\n", " engine=\"python\",\n", " na_values=[\"-\", \" \"],\n", " encoding=encodings[encoding - 1],\n", " header=headers[header - 1],\n", " )\n", " elif encoding > 0:\n", " df = pd.read_csv(\n", " url,\n", " sep=None,\n", " engine=\"python\",\n", " na_values=[\"-\", \" \"],\n", " encoding=encodings[encoding - 1],\n", " )\n", " elif header > 0:\n", " df = pd.read_csv(\n", " url,\n", " sep=None,\n", " engine=\"python\",\n", " na_values=[\"-\", \" \"],\n", " header=headers[header - 1],\n", " )\n", " else:\n", " df = pd.read_csv(url, sep=None, engine=\"python\", na_values=[\"-\", \" \"])\n", " except UnicodeDecodeError:\n", " if encoding == len(encodings):\n", " raise\n", " else:\n", " return read_csv(url=url, header=header, encoding=encoding + 1)\n", " except pd.errors.ParserError:\n", " if header == len(headers):\n", " raise\n", " else:\n", " return read_csv(url=url, header=header + 1, encoding=encoding)\n", " else:\n", " return df\n", "\n", "\n", "for i, csv in enumerate(df_csvs.itertuples()):\n", " # print(csv.dataset_title)\n", " try:\n", " response = s.get(csv.download_url)\n", " response.raise_for_status\n", " except:\n", " print(csv.dataset_title)\n", " with Path(\"csvs\", f\"{csv.file_index}.csv\").open(\"w\") as csv_file:\n", " csv_file.write(response.text)" ] }, { "cell_type": "markdown", "id": "4f7025bc-bd5c-4232-a441-e1fc8d23c7e7", "metadata": {}, "source": [ "## Create a list of datasets for index checking" ] }, { "cell_type": "code", "execution_count": 189, "id": "boxed-broadway", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "No details -- history-trust-of-sa-suffrage-petition\n", "Error -- queensland-state-archives-corporate-school-files-works-facilities-works-establishment-files-1871-1998-2018-02-23\n", "Error -- state-library-of-south-australia-fire-insurance-maps-1911-1914-2014-06-22\n", "No details -- public-records-office-victoria-outwards-passengers-from-victoria-1852-1915-2014-08-01\n", "No details -- history-trust-of-sa-passengers-in-history\n", "Error -- south-australian-museum-consultants-2017-18-2019-08-15\n", "Error -- nsw-state-archives-railway-employment-records-2014-09-30\n", "Error -- state-library-of-south-australia-19th-century-photographs-by-ernest-gall-2014-06-10\n", "Error -- queensland-museum-queensland-museum-collection-of-ethnographic-object-records-2014-06-25\n", "Error -- state-library-of-south-australia-bradman-collection-2013-11-18\n", "Error -- nsw-state-archives-nsw-govt-railways-and-tramways-roll-of-honour-1914-1919-csv-2014-09-30\n", "Error -- queensland-museum-queensland-museum-collection-of-historical-object-records-2014-06-25\n", "Error -- state-library-of-south-australia-election-leaflets-2013-05-08\n", "Error -- nsw-state-archives-nominal-roll-of-the-first-railway-section-aif-csv-2014-09-30\n", "Error -- state-library-of-queensland-nasla-music-csv-2013-05-29\n", "No details -- public-records-office-victoria-british-assisted-passengers-to-victoria-1839-1871-2014-08-01\n", "Error -- libraries-tasmania-archives-series-csv-2016-04-06\n", "Error -- south-australian-museum-workplace-health-and-safety-2017-18-2019-08-15\n", "Error -- state-library-of-south-australia-australia-1-63360-military-survey-s-a-1914-1958-2014-06-22\n", "Error -- state-library-of-south-australia-19th-century-photographs-by-townsend-duryea-2014-06-10\n", "Error -- public-records-office-victoria-reading-room-visitors-2014-12-23\n", "Error -- mount-gambier-library-commercial-street-traders-2014-06-10\n", "Error -- state-library-of-south-australia-election-posters-2013-05-08\n", "Error -- south-australian-museum-contractors-2017-18-2019-08-15\n", "No details -- public-records-office-victoria-unassisted-inward-passengers-1852-1923-2020-10-27\n" ] } ], "source": [ "# This will throw errors where the contents aren't CSV files\n", "# Delete these\n", "dfs = []\n", "for csv in Path(\"csvs\").glob(\"*.csv\"):\n", " # print(csv)\n", " file_index = csv.name.split(\".\")[0]\n", " # print(file_index)\n", " try:\n", " details = (\n", " df_csvs.loc[df_csvs[\"file_index\"] == file_index][\n", " [\"publisher\", \"info_url\", \"file_title\", \"file_modified\"]\n", " ]\n", " .iloc[0]\n", " .to_dict()\n", " )\n", " except IndexError:\n", " print(f\"No details -- {file_index}\")\n", " else:\n", " details[\"csv_file\"] = csv.name\n", " try:\n", " df_csv = pd.read_csv(csv, low_memory=False)\n", " except:\n", " print(f\"Error -- {file_index}\")\n", " else:\n", " details[\"columns\"] = \"|\".join(list(df_csv.columns))\n", " dfs.append(details)\n", "df = pd.DataFrame(dfs)" ] }, { "cell_type": "code", "execution_count": 190, "id": "1e32fe36-2031-4217-b3cd-3ae77071d13d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dataset_titlepublisherauthordataset_issueddataset_modifieddataset_descriptionsourceinfo_urlstart_dateend_date...download_urlformatfile_descriptionfile_createdfile_modifiedfile_sizelicencefile_indexcsv_filecolumns
0State Library of Queensland - Real estate mapsState Library of Queenslandopendata@slq.qld.gov.au2012-12-07T06:05:16.6403022020-12-09T05:55:15.871780A unique collection of original maps and plans...data.qld.gov.auhttps://data.qld.gov.au/dataset/959d611f-a9cf-......https://www.data.qld.gov.au/dataset/959d611f-a...CSVThis updated dataset includes links to 798 dig...2018-02-28T04:50:33.1275162019-08-19T06:18:57.312772252416Creative Commons Attribution 4.0state-library-of-queensland-real-estate-maps-f...state-library-of-queensland-real-estate-maps-f...Title|Description|Lat|Lon|Link|ID
1Passport registers 1926 to 1939Queensland State Archivesweb@archives.qld.gov.au2013-10-14T06:10:08.4092292022-06-20T23:00:36.801163These indexes were compiled from the passport ...data.qld.gov.auhttps://data.qld.gov.au/dataset/fc87f25a-dc02-......https://www.data.qld.gov.au/dataset/fc87f25a-d...CSVThis open data file lists the names of immigra...2017-01-11T23:47:35.4494652022-01-10T04:53:27.8279802831155Creative Commons Attribution 4.0queensland-state-archives-passport-clearances-...queensland-state-archives-passport-clearances-...Last name|Given names|Notes|Date of arrival|Ye...
2Assisted immigration 1848 to 1912Queensland State Archivesweb@archives.qld.gov.au2013-03-04T06:34:34.2700232022-06-20T12:57:24.964249These indexes were created from the [Registers...data.qld.gov.auhttps://data.qld.gov.au/dataset/ba182873-e8a7-......https://www.data.qld.gov.au/dataset/ba182873-e...CSVThis open data file lists the names of assiste...2013-03-05T23:30:57.3085462022-06-14T07:46:06.2344342621440Creative Commons Attribution 4.0queensland-state-archives-assisted-immigration...queensland-state-archives-assisted-immigration...Last name|Given names|Notes|Age|Ship|Date|Year...
3Australian South Sea Islanders 1867 to 1908Queensland State Archivesweb@archives.qld.gov.au2014-06-25T04:29:57.4385962022-06-20T13:07:35.777233This index was compiled from a wide variety of...data.qld.gov.auhttps://data.qld.gov.au/dataset/eae0afa9-681c-......https://www.data.qld.gov.au/dataset/eae0afa9-6...CSVThis open data file lists the names (L-Z) of A...2017-01-11T01:32:27.7479552017-01-11T01:32:27.55653513107200Creative Commons Attribution 4.0queensland-state-archives-australian-south-sea...queensland-state-archives-australian-south-sea...Last name|Given name/s|Page|Date|Ref|Prev sys ...
4Queensland Museum collection of protozoan spec...Queensland Museumopendata@qm.qld.gov.au2014-02-18T23:18:45.1020732019-07-10T16:42:34.524484A list of specimens of protozoan species in Qu...data.qld.gov.auhttps://data.qld.gov.au/dataset/4f1071f2-f4fa-......http://www.qm.qld.gov.au/microsites/data/proto...CSVA CSV file containing records of all protozoan...2014-02-18T23:19:05.3316562017-06-23T00:00:0041733324Creative Commons Attribution 4.0queensland-museum-queensland-museum-protozoan-...queensland-museum-queensland-museum-protozoan-...dcterms:type|dcterms:modified|dcterms:language...
..................................................................
723SA FOI – number of fee waiver or reduction by ...State Records South AustraliaState Records2018-02-02T04:38:06.7526082018-12-23T01:22:00.6300162017-18 annual reporting data on the number of...data.sa.gov.auhttps://data.sa.gov.au/data/dataset/f923f9b0-b...2012-07-012018-06-30...https://data.sa.gov.au/data/dataset/f923f9b0-b...CSV2017-18 annual reporting data on the number of...2018-02-02T15:38:27.9302642018-12-23T01:22:00.608148Creative Commons Attributionstate-records-south-australia-sa-foi-number-of...state-records-south-australia-sa-foi-number-of...Reason for fee waiver, by sector|No. of waiver...
724State Library of Queensland - Catalogue searchesState Library of Queenslandopendata@slq.qld.gov.au2012-12-07T05:55:14.5021232021-03-08T07:42:00.611055This open data file contains the text strings ...data.qld.gov.auhttps://data.qld.gov.au/dataset/cebb997c-1c42-......https://www.data.qld.gov.au/dataset/cebb997c-1...CSVThe text strings searched and count of recurri...2019-06-18T06:37:31.0109642019-08-27T01:13:09.674578104448Creative Commons Attribution 4.0state-library-of-queensland-july-2017-catalogu...state-library-of-queensland-july-2017-catalogu...Search strings|Count
725World War I Soldiers and Nurses (1914-1928).Libraries TasmaniaLibraries Tasmania2015-06-15T03:04:09.0561762021-11-23T14:36:42.489452Photographs, articles and applications for lan...data.gov.auhttps://data.gov.au/dataset/b711231a-2a02-48eb...19141928...https://data.gov.au/data/dataset/b711231a-2a02...CSV2016-03-22T10:01:58.5396072021-11-232835528Creative Commons Attribution 4.0 Internationallibraries-tasmania-world-war-one-tasmanian-pho...libraries-tasmania-world-war-one-tasmanian-pho...DIGITAL_OBJECT - URL_TEXT|DIGITAL_OBJECT - URL...
726Deceased Estate Files, 1880-1923NSW State ArchivesState Records Authority2014-09-30T04:52:48.8059722016-07-20T12:09:20.785878Researching deceased estates files before 1923...data.nsw.gov.auhttps://data.nsw.gov.au/data/dataset/5d45437c-......https://data.nsw.gov.au/data/dataset/5d45437c-...CSVThis dataset contains the following attributes...2014-09-30T00:55:53.313012Creative Commons Attributionnsw-state-archives-deceased-estates-2014-09-30nsw-state-archives-deceased-estates-2014-09-30...Surname|FirstName|Locality|DateOfDeath|DateDut...
727SA MemoryState Library of South AustraliaState Library of South Australia2013-03-07T16:15:35.2280852019-08-29T02:29:51.427322A selected and wide range of digitised archiva...data.sa.gov.auhttps://data.sa.gov.au/data/dataset/7cd90f98-1...1836-2010...https://data.sa.gov.au/data/dataset/7cd90f98-1...CSVA selected and wide range of digitised archiva...2013-05-31T01:01:00.4692712019-08-28T23:40:58.4002201495812Creative Commons Attributionstate-library-of-south-australia-sa-memory-201...state-library-of-south-australia-sa-memory-201...id|TITLE|CREATOR|INNOPAC|LINK|coverage_place|C...
\n", "

728 rows × 21 columns

\n", "
" ], "text/plain": [ " dataset_title \\\n", "0 State Library of Queensland - Real estate maps \n", "1 Passport registers 1926 to 1939 \n", "2 Assisted immigration 1848 to 1912 \n", "3 Australian South Sea Islanders 1867 to 1908 \n", "4 Queensland Museum collection of protozoan spec... \n", ".. ... \n", "723 SA FOI – number of fee waiver or reduction by ... \n", "724 State Library of Queensland - Catalogue searches \n", "725 World War I Soldiers and Nurses (1914-1928). \n", "726 Deceased Estate Files, 1880-1923 \n", "727 SA Memory \n", "\n", " publisher author \\\n", "0 State Library of Queensland opendata@slq.qld.gov.au \n", "1 Queensland State Archives web@archives.qld.gov.au \n", "2 Queensland State Archives web@archives.qld.gov.au \n", "3 Queensland State Archives web@archives.qld.gov.au \n", "4 Queensland Museum opendata@qm.qld.gov.au \n", ".. ... ... \n", "723 State Records South Australia State Records \n", "724 State Library of Queensland opendata@slq.qld.gov.au \n", "725 Libraries Tasmania Libraries Tasmania \n", "726 NSW State Archives State Records Authority \n", "727 State Library of South Australia State Library of South Australia \n", "\n", " dataset_issued dataset_modified \\\n", "0 2012-12-07T06:05:16.640302 2020-12-09T05:55:15.871780 \n", "1 2013-10-14T06:10:08.409229 2022-06-20T23:00:36.801163 \n", "2 2013-03-04T06:34:34.270023 2022-06-20T12:57:24.964249 \n", "3 2014-06-25T04:29:57.438596 2022-06-20T13:07:35.777233 \n", "4 2014-02-18T23:18:45.102073 2019-07-10T16:42:34.524484 \n", ".. ... ... \n", "723 2018-02-02T04:38:06.752608 2018-12-23T01:22:00.630016 \n", "724 2012-12-07T05:55:14.502123 2021-03-08T07:42:00.611055 \n", "725 2015-06-15T03:04:09.056176 2021-11-23T14:36:42.489452 \n", "726 2014-09-30T04:52:48.805972 2016-07-20T12:09:20.785878 \n", "727 2013-03-07T16:15:35.228085 2019-08-29T02:29:51.427322 \n", "\n", " dataset_description source \\\n", "0 A unique collection of original maps and plans... data.qld.gov.au \n", "1 These indexes were compiled from the passport ... data.qld.gov.au \n", "2 These indexes were created from the [Registers... data.qld.gov.au \n", "3 This index was compiled from a wide variety of... data.qld.gov.au \n", "4 A list of specimens of protozoan species in Qu... data.qld.gov.au \n", ".. ... ... \n", "723 2017-18 annual reporting data on the number of... data.sa.gov.au \n", "724 This open data file contains the text strings ... data.qld.gov.au \n", "725 Photographs, articles and applications for lan... data.gov.au \n", "726 Researching deceased estates files before 1923... data.nsw.gov.au \n", "727 A selected and wide range of digitised archiva... data.sa.gov.au \n", "\n", " info_url start_date \\\n", "0 https://data.qld.gov.au/dataset/959d611f-a9cf-... \n", "1 https://data.qld.gov.au/dataset/fc87f25a-dc02-... \n", "2 https://data.qld.gov.au/dataset/ba182873-e8a7-... \n", "3 https://data.qld.gov.au/dataset/eae0afa9-681c-... \n", "4 https://data.qld.gov.au/dataset/4f1071f2-f4fa-... \n", ".. ... ... \n", "723 https://data.sa.gov.au/data/dataset/f923f9b0-b... 2012-07-01 \n", "724 https://data.qld.gov.au/dataset/cebb997c-1c42-... \n", "725 https://data.gov.au/dataset/b711231a-2a02-48eb... 1914 \n", "726 https://data.nsw.gov.au/data/dataset/5d45437c-... \n", "727 https://data.sa.gov.au/data/dataset/7cd90f98-1... 1836-2010 \n", "\n", " end_date ... download_url \\\n", "0 ... https://www.data.qld.gov.au/dataset/959d611f-a... \n", "1 ... https://www.data.qld.gov.au/dataset/fc87f25a-d... \n", "2 ... https://www.data.qld.gov.au/dataset/ba182873-e... \n", "3 ... https://www.data.qld.gov.au/dataset/eae0afa9-6... \n", "4 ... http://www.qm.qld.gov.au/microsites/data/proto... \n", ".. ... ... ... \n", "723 2018-06-30 ... https://data.sa.gov.au/data/dataset/f923f9b0-b... \n", "724 ... https://www.data.qld.gov.au/dataset/cebb997c-1... \n", "725 1928 ... https://data.gov.au/data/dataset/b711231a-2a02... \n", "726 ... https://data.nsw.gov.au/data/dataset/5d45437c-... \n", "727 ... https://data.sa.gov.au/data/dataset/7cd90f98-1... \n", "\n", " format file_description \\\n", "0 CSV This updated dataset includes links to 798 dig... \n", "1 CSV This open data file lists the names of immigra... \n", "2 CSV This open data file lists the names of assiste... \n", "3 CSV This open data file lists the names (L-Z) of A... \n", "4 CSV A CSV file containing records of all protozoan... \n", ".. ... ... \n", "723 CSV 2017-18 annual reporting data on the number of... \n", "724 CSV The text strings searched and count of recurri... \n", "725 CSV \n", "726 CSV This dataset contains the following attributes... \n", "727 CSV A selected and wide range of digitised archiva... \n", "\n", " file_created file_modified file_size \\\n", "0 2018-02-28T04:50:33.127516 2019-08-19T06:18:57.312772 252416 \n", "1 2017-01-11T23:47:35.449465 2022-01-10T04:53:27.827980 2831155 \n", "2 2013-03-05T23:30:57.308546 2022-06-14T07:46:06.234434 2621440 \n", "3 2017-01-11T01:32:27.747955 2017-01-11T01:32:27.556535 13107200 \n", "4 2014-02-18T23:19:05.331656 2017-06-23T00:00:00 41733324 \n", ".. ... ... ... \n", "723 2018-02-02T15:38:27.930264 2018-12-23T01:22:00.608148 \n", "724 2019-06-18T06:37:31.010964 2019-08-27T01:13:09.674578 104448 \n", "725 2016-03-22T10:01:58.539607 2021-11-23 2835528 \n", "726 2014-09-30T00:55:53.313012 \n", "727 2013-05-31T01:01:00.469271 2019-08-28T23:40:58.400220 1495812 \n", "\n", " licence \\\n", "0 Creative Commons Attribution 4.0 \n", "1 Creative Commons Attribution 4.0 \n", "2 Creative Commons Attribution 4.0 \n", "3 Creative Commons Attribution 4.0 \n", "4 Creative Commons Attribution 4.0 \n", ".. ... \n", "723 Creative Commons Attribution \n", "724 Creative Commons Attribution 4.0 \n", "725 Creative Commons Attribution 4.0 International \n", "726 Creative Commons Attribution \n", "727 Creative Commons Attribution \n", "\n", " file_index \\\n", "0 state-library-of-queensland-real-estate-maps-f... \n", "1 queensland-state-archives-passport-clearances-... \n", "2 queensland-state-archives-assisted-immigration... \n", "3 queensland-state-archives-australian-south-sea... \n", "4 queensland-museum-queensland-museum-protozoan-... \n", ".. ... \n", "723 state-records-south-australia-sa-foi-number-of... \n", "724 state-library-of-queensland-july-2017-catalogu... \n", "725 libraries-tasmania-world-war-one-tasmanian-pho... \n", "726 nsw-state-archives-deceased-estates-2014-09-30 \n", "727 state-library-of-south-australia-sa-memory-201... \n", "\n", " csv_file \\\n", "0 state-library-of-queensland-real-estate-maps-f... \n", "1 queensland-state-archives-passport-clearances-... \n", "2 queensland-state-archives-assisted-immigration... \n", "3 queensland-state-archives-australian-south-sea... \n", "4 queensland-museum-queensland-museum-protozoan-... \n", ".. ... \n", "723 state-records-south-australia-sa-foi-number-of... \n", "724 state-library-of-queensland-july-2017-catalogu... \n", "725 libraries-tasmania-world-war-one-tasmanian-pho... \n", "726 nsw-state-archives-deceased-estates-2014-09-30... \n", "727 state-library-of-south-australia-sa-memory-201... \n", "\n", " columns \n", "0 Title|Description|Lat|Lon|Link|ID \n", "1 Last name|Given names|Notes|Date of arrival|Ye... \n", "2 Last name|Given names|Notes|Age|Ship|Date|Year... \n", "3 Last name|Given name/s|Page|Date|Ref|Prev sys ... \n", "4 dcterms:type|dcterms:modified|dcterms:language... \n", ".. ... \n", "723 Reason for fee waiver, by sector|No. of waiver... \n", "724 Search strings|Count \n", "725 DIGITAL_OBJECT - URL_TEXT|DIGITAL_OBJECT - URL... \n", "726 Surname|FirstName|Locality|DateOfDeath|DateDut... \n", "727 id|TITLE|CREATOR|INNOPAC|LINK|coverage_place|C... \n", "\n", "[728 rows x 21 columns]" ] }, "execution_count": 190, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 109, "id": "right-starter", "metadata": {}, "outputs": [], "source": [ "df.to_csv(f'csvs_for_indexing_{datetime.datetime.now().strftime('%Y%m%d')}.csv', index=False)" ] }, { "cell_type": "markdown", "id": "5c5eddee-bc06-4f45-a96f-3383684d4492", "metadata": {}, "source": [ "## Merge checked files" ] }, { "cell_type": "code", "execution_count": 110, "id": "early-projector", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
publisherinfo_urlcsv_fileindexdropextract
0Australian Institute of Aboriginal and Torres ...https://data.gov.au/dataset/11cbf24a-a31a-488c...australian-institute-of-aboriginal-and-torres-...
1Libraries Tasmaniahttps://data.gov.au/dataset/b0627a17-6783-4c18...libraries-tasmania-bankruptcy-csv-2017-07-14.csvNAME|NAME_SEE_ALSO
2Libraries Tasmaniahttps://data.gov.au/dataset/069a423b-abd8-4454...libraries-tasmania-colonial-secretary-correspo...DESC|NAME|NAME_SEE_ALSO
3Libraries Tasmaniahttps://data.gov.au/dataset/58a9a8d7-01e0-43df...libraries-tasmania-court-csv-2017-07-14.csvNAME
4Libraries Tasmaniahttps://data.gov.au/dataset/d7ec2d93-b9dd-482b...libraries-tasmania-digitised-archives-csv-2016...
\n", "
" ], "text/plain": [ " publisher \\\n", "0 Australian Institute of Aboriginal and Torres ... \n", "1 Libraries Tasmania \n", "2 Libraries Tasmania \n", "3 Libraries Tasmania \n", "4 Libraries Tasmania \n", "\n", " info_url \\\n", "0 https://data.gov.au/dataset/11cbf24a-a31a-488c... \n", "1 https://data.gov.au/dataset/b0627a17-6783-4c18... \n", "2 https://data.gov.au/dataset/069a423b-abd8-4454... \n", "3 https://data.gov.au/dataset/58a9a8d7-01e0-43df... \n", "4 https://data.gov.au/dataset/d7ec2d93-b9dd-482b... \n", "\n", " csv_file index \\\n", "0 australian-institute-of-aboriginal-and-torres-... \n", "1 libraries-tasmania-bankruptcy-csv-2017-07-14.csv NAME|NAME_SEE_ALSO \n", "2 libraries-tasmania-colonial-secretary-correspo... DESC|NAME|NAME_SEE_ALSO \n", "3 libraries-tasmania-court-csv-2017-07-14.csv NAME \n", "4 libraries-tasmania-digitised-archives-csv-2016... \n", "\n", " drop extract \n", "0 \n", "1 \n", "2 \n", "3 \n", "4 " ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get previously checked details\n", "# Change date to previous checked file\n", "df_checked = pd.read_csv(\n", " \"csvs_for_indexing_checked_20211018.csv\", keep_default_na=False\n", ")[[\"publisher\", \"info_url\", \"csv_file\", \"index\", \"drop\", \"extract\"]]\n", "df_checked.head()" ] }, { "cell_type": "code", "execution_count": 191, "id": "qualified-ground", "metadata": {}, "outputs": [], "source": [ "# Merge checking files\n", "# df_new_check = pd.merge(df, df_checked, how='left', on=['publisher', 'info_url', 'file_title', 'file_modified'])\n", "# This should merge latest harvested data with the indexing info from the checked file\n", "df_new_check = pd.merge(\n", " df, df_checked, how=\"left\", on=[\"publisher\", \"info_url\", \"csv_file\"]\n", ")" ] }, { "cell_type": "code", "execution_count": 192, "id": "b8c5c3df-d5ff-47f4-9978-549996eb87a4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
publisherinfo_urlfile_title_xfile_modified_xcsv_filefile_title_yfile_modified_ycolumnsindexdropextract
0State Library of Queenslandhttps://data.qld.gov.au/dataset/959d611f-a9cf-...Real Estate Maps February 20182019-08-19T06:18:57.312772state-library-of-queensland-real-estate-maps-f...Real Estate Maps February 20182019-08-19T06:18:57.313Title|Description|Lat|Lon|Link|ID
1Queensland State Archiveshttps://data.qld.gov.au/dataset/fc87f25a-dc02-...Passport clearances 1923 to 19402022-01-10T04:53:27.827980queensland-state-archives-passport-clearances-...Passport clearances 1923 to 19402022-01-10T04:53:27.828Last name|Given names|Notes|Date of arrival|Ye...Last name|Given namesDescription
2Queensland State Archiveshttps://data.qld.gov.au/dataset/ba182873-e8a7-...Assisted immigration 1848 to 1912 - A2022-06-14T07:46:06.234434queensland-state-archives-assisted-immigration...Assisted immigration 1848 to 1912 - A2022-06-14T07:46:06.234Last name|Given names|Notes|Age|Ship|Date|Year...
3Queensland State Archiveshttps://data.qld.gov.au/dataset/eae0afa9-681c-...Australian South Sea Islanders 1867 to 1908 L-Z2017-01-11T01:32:27.556535queensland-state-archives-australian-south-sea...Australian South Sea Islanders 1867 to 1908 L-Z2017-01-11T01:32:27.557Last name|Given name/s|Page|Date|Ref|Prev sys ...
4Queensland Museumhttps://data.qld.gov.au/dataset/4f1071f2-f4fa-...Queensland Museum protozoan collection records2017-06-23T00:00:00queensland-museum-queensland-museum-protozoan-...Queensland Museum protozoan collection records2017-06-23T00:00:00dcterms:type|dcterms:modified|dcterms:language...
....................................
723State Records South Australiahttps://data.sa.gov.au/data/dataset/f923f9b0-b...SA FOI – number of fee waiver or reduction by ...2018-12-23T01:22:00.608148state-records-south-australia-sa-foi-number-of...SA FOI – number of fee waiver or reduction by ...2018-12-23T01:22:00.608Reason for fee waiver, by sector|No. of waiver...
724State Library of Queenslandhttps://data.qld.gov.au/dataset/cebb997c-1c42-...July 2017 Catalogue searches2019-08-27T01:13:09.674578state-library-of-queensland-july-2017-catalogu...July 2017 Catalogue searches2019-08-27T01:13:09.675Search strings|Count
725Libraries Tasmaniahttps://data.gov.au/dataset/b711231a-2a02-48eb...World War One Tasmanian Photographs - CSV2021-11-23libraries-tasmania-world-war-one-tasmanian-pho...World War One Tasmanian Photographs - CSV2021-11-23DIGITAL_OBJECT - URL_TEXT|DIGITAL_OBJECT - URL...NAME
726NSW State Archiveshttps://data.nsw.gov.au/data/dataset/5d45437c-...Deceased Estatesnsw-state-archives-deceased-estates-2014-09-30...Deceased EstatesSurname|FirstName|Locality|DateOfDeath|DateDut...
727State Library of South Australiahttps://data.sa.gov.au/data/dataset/7cd90f98-1...SA Memory2019-08-28T23:40:58.400220state-library-of-south-australia-sa-memory-201...SA Memory2019-08-28T23:40:58.400id|TITLE|CREATOR|INNOPAC|LINK|coverage_place|C...
\n", "

728 rows × 11 columns

\n", "
" ], "text/plain": [ " publisher \\\n", "0 State Library of Queensland \n", "1 Queensland State Archives \n", "2 Queensland State Archives \n", "3 Queensland State Archives \n", "4 Queensland Museum \n", ".. ... \n", "723 State Records South Australia \n", "724 State Library of Queensland \n", "725 Libraries Tasmania \n", "726 NSW State Archives \n", "727 State Library of South Australia \n", "\n", " info_url \\\n", "0 https://data.qld.gov.au/dataset/959d611f-a9cf-... \n", "1 https://data.qld.gov.au/dataset/fc87f25a-dc02-... \n", "2 https://data.qld.gov.au/dataset/ba182873-e8a7-... \n", "3 https://data.qld.gov.au/dataset/eae0afa9-681c-... \n", "4 https://data.qld.gov.au/dataset/4f1071f2-f4fa-... \n", ".. ... \n", "723 https://data.sa.gov.au/data/dataset/f923f9b0-b... \n", "724 https://data.qld.gov.au/dataset/cebb997c-1c42-... \n", "725 https://data.gov.au/dataset/b711231a-2a02-48eb... \n", "726 https://data.nsw.gov.au/data/dataset/5d45437c-... \n", "727 https://data.sa.gov.au/data/dataset/7cd90f98-1... \n", "\n", " file_title_x \\\n", "0 Real Estate Maps February 2018 \n", "1 Passport clearances 1923 to 1940 \n", "2 Assisted immigration 1848 to 1912 - A \n", "3 Australian South Sea Islanders 1867 to 1908 L-Z \n", "4 Queensland Museum protozoan collection records \n", ".. ... \n", "723 SA FOI – number of fee waiver or reduction by ... \n", "724 July 2017 Catalogue searches \n", "725 World War One Tasmanian Photographs - CSV \n", "726 Deceased Estates \n", "727 SA Memory \n", "\n", " file_modified_x \\\n", "0 2019-08-19T06:18:57.312772 \n", "1 2022-01-10T04:53:27.827980 \n", "2 2022-06-14T07:46:06.234434 \n", "3 2017-01-11T01:32:27.556535 \n", "4 2017-06-23T00:00:00 \n", ".. ... \n", "723 2018-12-23T01:22:00.608148 \n", "724 2019-08-27T01:13:09.674578 \n", "725 2021-11-23 \n", "726 \n", "727 2019-08-28T23:40:58.400220 \n", "\n", " csv_file \\\n", "0 state-library-of-queensland-real-estate-maps-f... \n", "1 queensland-state-archives-passport-clearances-... \n", "2 queensland-state-archives-assisted-immigration... \n", "3 queensland-state-archives-australian-south-sea... \n", "4 queensland-museum-queensland-museum-protozoan-... \n", ".. ... \n", "723 state-records-south-australia-sa-foi-number-of... \n", "724 state-library-of-queensland-july-2017-catalogu... \n", "725 libraries-tasmania-world-war-one-tasmanian-pho... \n", "726 nsw-state-archives-deceased-estates-2014-09-30... \n", "727 state-library-of-south-australia-sa-memory-201... \n", "\n", " file_title_y \\\n", "0 Real Estate Maps February 2018 \n", "1 Passport clearances 1923 to 1940 \n", "2 Assisted immigration 1848 to 1912 - A \n", "3 Australian South Sea Islanders 1867 to 1908 L-Z \n", "4 Queensland Museum protozoan collection records \n", ".. ... \n", "723 SA FOI – number of fee waiver or reduction by ... \n", "724 July 2017 Catalogue searches \n", "725 World War One Tasmanian Photographs - CSV \n", "726 Deceased Estates \n", "727 SA Memory \n", "\n", " file_modified_y \\\n", "0 2019-08-19T06:18:57.313 \n", "1 2022-01-10T04:53:27.828 \n", "2 2022-06-14T07:46:06.234 \n", "3 2017-01-11T01:32:27.557 \n", "4 2017-06-23T00:00:00 \n", ".. ... \n", "723 2018-12-23T01:22:00.608 \n", "724 2019-08-27T01:13:09.675 \n", "725 2021-11-23 \n", "726 \n", "727 2019-08-28T23:40:58.400 \n", "\n", " columns index \\\n", "0 Title|Description|Lat|Lon|Link|ID \n", "1 Last name|Given names|Notes|Date of arrival|Ye... Last name|Given names \n", "2 Last name|Given names|Notes|Age|Ship|Date|Year... \n", "3 Last name|Given name/s|Page|Date|Ref|Prev sys ... \n", "4 dcterms:type|dcterms:modified|dcterms:language... \n", ".. ... ... \n", "723 Reason for fee waiver, by sector|No. of waiver... \n", "724 Search strings|Count \n", "725 DIGITAL_OBJECT - URL_TEXT|DIGITAL_OBJECT - URL... NAME \n", "726 Surname|FirstName|Locality|DateOfDeath|DateDut... \n", "727 id|TITLE|CREATOR|INNOPAC|LINK|coverage_place|C... \n", "\n", " drop extract \n", "0 \n", "1 Description \n", "2 \n", "3 \n", "4 \n", ".. ... ... \n", "723 \n", "724 \n", "725 \n", "726 \n", "727 \n", "\n", "[728 rows x 11 columns]" ] }, "execution_count": 192, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_new_check" ] }, { "cell_type": "markdown", "id": "59a07295-af5b-44b2-b4ae-4de3da28db20", "metadata": {}, "source": [ "Manually check the file below to see if all historical files with names have index values. Check against list of new file titles." ] }, { "cell_type": "code", "execution_count": 113, "id": "outdoor-south", "metadata": {}, "outputs": [], "source": [ "# Save new checking file\n", "df_new_check.to_csv(f'csvs_for_indexing_checked_{datetime.datetime.now().strftime('%Y%m%d')}.csv', index=False)" ] }, { "cell_type": "markdown", "id": "b9191f90-c439-4114-8ce8-b21da4b8f5e3", "metadata": {}, "source": [ "## PROV datasets are zipped" ] }, { "cell_type": "code", "execution_count": 94, "id": "forced-death", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Victorian World War One Soldier Settlers\n", "British Assisted Passengers to Victoria 1839-1871\n", "public-records-office-victoria-british-assisted-passengers-to-victoria-1839-1871-2014-08-01.csv\n", "Unassisted Inward Passengers 1852-1923\n", "public-records-office-victoria-unassisted-inward-passengers-1852-1923-2020-10-27.csv\n", "Outwards Passengers from Victoria 1852-1915\n", "public-records-office-victoria-outwards-passengers-from-victoria-1852-1915-2014-08-01.csv\n" ] } ], "source": [ "prov_csvs = pd.read_csv(\"extra-prov-indexes.csv\")\n", "prov_csvs[\"file_index\"] = prov_csvs.apply(\n", " lambda x: f'{slugify(x[\"publisher\"])}-{slugify(x[\"file_title\"])}-{slugify(x[\"file_created\"][:10])}',\n", " axis=1,\n", ")\n", "for i, csv in enumerate(prov_csvs.itertuples()):\n", " print(csv.dataset_title)\n", " response = s.get(csv.download_url)\n", " response.raise_for_status\n", " if csv.format == \"ZIP\":\n", " print(\n", " f\"{slugify(csv.publisher)}-{slugify(csv.file_title)}-{csv.file_created[:10]}.csv\"\n", " )\n", " try:\n", " z = zipfile.ZipFile(io.BytesIO(response.content))\n", " z.extractall(\"prov_csvs\")\n", " except zipfile.BadZipFile:\n", " pass\n", " else:\n", " with Path(\n", " \"prov_csvs\",\n", " f\"{slugify(csv.publisher)}-{slugify(csv.file_title)}-{csv.file_created[:10]}.csv\",\n", " ).open(\"w\") as csv_file:\n", " csv_file.write(response.text)" ] }, { "cell_type": "code", "execution_count": 95, "id": "premier-trustee", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "prov_csvs/public-records-office-victoria-british-assisted-passengers-to-victoria-1839-1871-2014-08-01.csv\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/tim/.pyenv/versions/3.8.5/envs/ozglam-data/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3155: DtypeWarning: Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.\n", " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "prov_csvs/public-records-office-victoria-outwards-passengers-from-victoria-1852-1915-2014-08-01.csv\n", "prov_csvs/public-records-office-victoria-victorian-world-war-one-soldier-settlers-2015-05-29.csv\n", "prov_csvs/public-records-office-victoria-unassisted-inward-passengers-1852-1923-2020-10-27.csv\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/Users/tim/.pyenv/versions/3.8.5/envs/ozglam-data/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3155: DtypeWarning: Columns (6,9,11,12) have mixed types.Specify dtype option on import or set low_memory=False.\n", " has_raised = await self.run_ast_nodes(code_ast.body, cell_name,\n" ] } ], "source": [ "prov_df = pd.DataFrame()\n", "for csv in Path(\"prov_csvs\").glob(\"*.csv\"):\n", " print(csv)\n", " file_index = csv.name.split(\".\")[0]\n", " details = (\n", " prov_csvs.loc[prov_csvs[\"file_index\"] == file_index][\n", " [\"publisher\", \"info_url\", \"file_title\", \"file_modified\"]\n", " ]\n", " .iloc[0]\n", " .to_dict()\n", " )\n", " details[\"csv_file\"] = csv.name\n", " df_csv = pd.read_csv(csv, low_memory=False)\n", " details[\"columns\"] = \"|\".join(list(df_csv.columns))\n", " prov_df = prov_df.append(details, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 178, "id": "built-tackle", "metadata": {}, "outputs": [], "source": [ "prov_df.to_csv(\"prov_csvs_for_indexing.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "disabled-channel", "metadata": {}, "outputs": [], "source": [ "{\n", " \"databases\": {\n", " \"database1\": {\n", " \"source\": \"Alternative source\",\n", " \"source_url\": \"http://example.com/\",\n", " \"tables\": {\n", " \"example_table\": {\n", " \"description_html\": \"Custom table description\",\n", " \"license\": \"CC BY 3.0 US\",\n", " \"license_url\": \"https://creativecommons.org/licenses/by/3.0/us/\",\n", " }\n", " },\n", " }\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 212, "id": "silent-marathon", "metadata": {}, "outputs": [], "source": [ "df_checked = pd.read_csv(\n", " \"csvs_for_indexing_checked_20220809.csv\", keep_default_na=False\n", ")\n", "df_checked_filtered = df_checked.loc[df_checked[\"index\"] != \"\"]\n", "df_prov_checked = pd.read_csv(\n", " \"prov_csvs_for_indexing_checked.csv\", keep_default_na=False\n", ")\n", "df_sa_checked = pd.read_csv(\"sa_datasets_for_checking.csv\", keep_default_na=False)\n", "df_all_checked = pd.concat([df_checked_filtered, df_prov_checked, df_sa_checked])\n", "df_all = pd.read_csv(\"glam-datasets-from-gov-portals.csv\", keep_default_na=False)\n", "df_all[\"csv_file\"] = df_all.apply(\n", " lambda x: f'{slugify(x[\"publisher\"])}-{slugify(x[\"file_title\"])}-{slugify(x[\"file_created\"][:10])}.csv',\n", " axis=1,\n", ")\n", "\n", "# df_final = pd.merge(df_all_checked, df_all, how='left', on=['file_title', 'info_url', 'publisher', 'file_modified'])\n", "df_final = pd.merge(\n", " df_all_checked, df_all, how=\"left\", on=[\"info_url\", \"publisher\", \"csv_file\"]\n", ")" ] }, { "cell_type": "code", "execution_count": 213, "id": "angry-hindu", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(194, 25)" ] }, "execution_count": 213, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_final.shape" ] }, { "cell_type": "code", "execution_count": 214, "id": "d0f48dc0-4b8d-4a2e-ba70-543444338d33", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['history-trust-of-south-australia-executive-employment-at-the-history-trust-of-south-australia-2011-2020-2018-09-03.csv',\n", " 'history-trust-of-south-australia-consultants-engaged-by-the-history-trust-of-south-australia-2019-11-08.csv',\n", " 'history-trust-of-south-australia-public-complaints-received-by-history-trust-of-south-australia-2018-09-03.csv',\n", " 'history-trust-of-south-australia-fraud-detection-at-history-trust-of-south-australia-2011-2021-2018-09-03.csv',\n", " 'history-trust-of-south-australia-whistleblowers-disclosure-for-history-trust-of-south-australia-2011-2021-2018-09-03.csv',\n", " 'history-trust-of-south-australia-contractors-engaged-by-the-history-trust-of-south-australia-2019-11-08.csv',\n", " 'history-trust-of-south-australia-passengers-in-history-search-index-2016-06-28.csv',\n", " 'history-trust-of-south-australia-workplace-injury-claims-received-by-history-trust-of-south-australia-2019-20-2018-09-03.csv',\n", " 'history-trust-of-south-australia-suffrage125-petition-2019-04-04.csv',\n", " 'history-trust-of-south-australia-bond-studios-glass-negative-collection-2017-06-27.csv',\n", " 'history-trust-of-south-australia-south-australian-government-photographic-collection-api-2017-06-23.csv',\n", " 'history-trust-of-south-australia-sa-government-photographic-collection-2017-07-28.csv',\n", " 'history-trust-of-south-australia-state-history-collection-2017-06-27.csv',\n", " 'history-trust-of-south-australia-community-history-live-stream-2015-05-15.csv',\n", " 'history-trust-of-south-australia-historical-places-2015-07-02.csv',\n", " 'history-trust-of-south-australia-historical-things-2015-07-02.csv',\n", " 'history-trust-of-south-australia-historical-organisations-2015-07-02.csv',\n", " 'history-trust-of-south-australia-historical-events-2015-07-02.csv',\n", " 'history-trust-of-south-australia-internee-data-2015-05-29.csv',\n", " 'history-trust-of-south-australia-history-festival-events-2015-05-14.csv']" ] }, "execution_count": 214, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_all.loc[df_all[\"publisher\"] == \"History Trust of South Australia\"][\n", " \"csv_file\"\n", "].to_list()" ] }, { "cell_type": "code", "execution_count": 216, "id": "cross-adolescent", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "history-trust-of-south-australia-passengers-in-history-search-index-2016-06-28.csv\n", "history-trust-of-south-australia-suffrage125-petition-2019-04-04.csv\n", "libraries-tasmania-bankruptcy-csv-2017-07-14.csv\n", "libraries-tasmania-colonial-secretary-correspondence-csv-2019-09-05.csv\n", "libraries-tasmania-court-csv-2017-07-14.csv\n", "libraries-tasmania-education-csv-2019-09-05.csv\n", "libraries-tasmania-eheritage-data-csv-2017-07-17.csv\n", "libraries-tasmania-employment-csv-2019-09-05.csv\n", "libraries-tasmania-hotels-properties-csv-2016-03-22.csv\n", "libraries-tasmania-land-records-csv-2021-03-23.csv\n", "libraries-tasmania-miscellaneous-csv-2019-09-05.csv\n", "libraries-tasmania-tasmanian-arrivals-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-births-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-census-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-convicts-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-convicts-permission-to-marry-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-deaths-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-departures-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-divorces-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-health-welfare-records-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-immigration-csv-2017-05-09.csv\n", "libraries-tasmania-tasmanian-inquests-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-marriages-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-naturalisations-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-prisoners-csv-2016-03-22.csv\n", "libraries-tasmania-tasmanian-wills-csv-2016-03-22.csv\n", "libraries-tasmania-world-war-one-tasmanian-photographs-csv-2016-03-22.csv\n", "nsw-state-archives-index-to-certificates-of-freedom-1823-69-2015-10-01.csv\n", "nsw-state-archives-index-to-convict-bank-accounts-1837-70-2015-10-01.csv\n", "nsw-state-archives-index-to-convict-pardons-1791-1825-and-1837-47-2015-10-01.csv\n", "nsw-state-archives-index-to-tickets-of-exemption-from-government-labour-1827-32-2015-10-01.csv\n", "nsw-state-archives-index-to-tickets-of-leave-passports-1835-69-2015-10-01.csv\n", "nsw-state-archives-index-to-tickets-of-leave-1810-75-2015-10-01.csv\n", "nsw-state-archives-index-to-tickets-of-leave-certificates-of-emancipation-and-pardons-1810-19-2015-10-01.csv\n", "public-records-office-victoria-index-to-wills-probate-and-administration-records-1841-2009-2014-12-22.csv\n", "public-records-office-victoria-victorian-world-war-one-soldier-settlers-2015-05-29.csv\n", "public-records-office-victoria-vprs-515-p1-central-register-of-male-prisoners-2014-12-22.csv\n", "public-records-office-victoria-british-assisted-passengers-to-victoria-1839-1871-2014-08-01.csv\n", "public-records-office-victoria-unassisted-inward-passengers-1852-1923-2020-10-27.csv\n", "public-records-office-victoria-outwards-passengers-from-victoria-1852-1915-2014-08-01.csv\n", "queensland-state-archives-aboriginal-war-census-1915-to-1916-2015-07-08.csv\n", "queensland-state-archives-army-reservist-payments-1909-to-1920-2016-08-16.csv\n", "queensland-state-archives-assisted-immigration-1848-to-1912-combined-2018-08-27.csv\n", "queensland-state-archives-australian-south-sea-islanders-1867-1948-2021-02-23.csv\n", "queensland-state-archives-beaudesert-shire-and-logan-village-burials-1878-2000-2020-06-24.csv\n", "queensland-state-archives-brisbane-hospital-admissions-1872-to-1887-2014-09-29.csv\n", "queensland-state-archives-brisbane-hospital-registers-of-deaths-1899-to-1913-2014-09-29.csv\n", "queensland-state-archives-brisbane-hospital-registers-of-deaths-1933-to-1963-2014-09-29.csv\n", "queensland-state-archives-chronological-register-of-convicts-1824-1839-2018-03-02.csv\n", "queensland-state-archives-civil-servants-1866-1867-2014-06-24.csv\n", "queensland-state-archives-coloured-labour-and-asiatic-aliens-in-queensland-1913-2014-06-24.csv\n", "queensland-state-archives-companies-1863-to-1959-2014-07-07.csv\n", "queensland-state-archives-consumptive-patients-1897-to-1903-2014-09-29.csv\n", "queensland-state-archives-criminal-depositions-1861-1885-2018-12-18.csv\n", "queensland-state-archives-dental-board-1910-to-1932-2014-06-25.csv\n", "queensland-state-archives-dental-board-records-1900-1932-2022-04-12.csv\n", "queensland-state-archives-dentist-apprentices-1903-to-1925-2014-06-25.csv\n", "queensland-state-archives-dentists-1903-to-1932-2014-06-25.csv\n", "queensland-state-archives-discharged-soldier-settler-loans-1917-1919-2017-07-21.csv\n", "queensland-state-archives-divorces-1861-1894-2013-03-04.csv\n", "queensland-state-archives-equity-files-1857-to-1899-2013-03-03.csv\n", "queensland-state-archives-farm-lads-1922-1940-2018-01-19.csv\n", "queensland-state-archives-female-prisoners-admitted-toowoomba-1887-1891-2015-07-02.csv\n", "queensland-state-archives-immigrants-landed-bowen-1888-1896-2022-05-06.csv\n", "queensland-state-archives-immigrants-nominated-for-passage-maryborough-1884-to-1907-2013-06-25.csv\n", "queensland-state-archives-immigrants-bowen-immigration-depot-1885-1892-2021-07-22.csv\n", "queensland-state-archives-immigration-1922-to-1940-2013-06-25.csv\n", "queensland-state-archives-imperial-pensions-1872-to-1915-2014-06-23.csv\n", "queensland-state-archives-imperial-pensions-1898-to-1912-2014-06-23.csv\n", "queensland-state-archives-index-to-pensions-1908-1909-2020-06-28.csv\n", "queensland-state-archives-index-to-boer-war-records-1899-1902-2018-12-14.csv\n", "queensland-state-archives-index-to-brisbane-gaol-hospital-admission-registers-1889-1911-2020-06-24.csv\n", "queensland-state-archives-index-to-colonial-secretary-s-correspondence-1859-1861-csv-2018-12-17.csv\n", "queensland-state-archives-index-to-dunwich-benevolent-asylum-1885-1907-2021-01-19.csv\n", "queensland-state-archives-index-to-immigrants-and-crew-1860-1964-2020-06-24.csv\n", "queensland-state-archives-index-to-immigration-1909-1932-2020-06-30.csv\n", "queensland-state-archives-index-to-mariner-s-certificates-1877-1939-2022-04-27.csv\n", "queensland-state-archives-index-to-outdoor-relief-1892-1920-2020-06-29.csv\n", "queensland-state-archives-index-to-register-of-cases-and-treatment-at-moreton-bay-hospital-1830-1862-2020-06-28.csv\n", "queensland-state-archives-index-to-registers-of-agricultural-lessees-1885-1908-2020-06-29.csv\n", "queensland-state-archives-index-to-registers-of-immigrants-rockhampton-1882-1915-2020-06-29.csv\n", "queensland-state-archives-index-to-wallangarra-flu-camp-1918-1919-2021-07-26.csv\n", "queensland-state-archives-indigence-cases-1899-to-1948-2016-08-16.csv\n", "queensland-state-archives-inquests-1859-to-1902-2013-03-03.csv\n", "queensland-state-archives-instruments-of-renunciation-1915-to-1983-2013-03-04.csv\n", "queensland-state-archives-justices-of-the-peace-1857-to-1957-2013-03-04.csv\n", "queensland-state-archives-land-orders-1861-to-1874-2013-06-25.csv\n", "queensland-state-archives-land-orders-1862-to-1878-2016-12-08.csv\n", "queensland-state-archives-land-orders-1865-to-1866-2016-02-22.csv\n", "queensland-state-archives-land-selections-1885-1981-2021-07-23.csv\n", "queensland-state-archives-lazaret-patient-registers-2021-08-02.csv\n", "queensland-state-archives-leases-selections-and-pastoral-runs-and-other-related-records-1850-2014-2020-06-29.csv\n", "queensland-state-archives-mackay-hospital-admissions-1891-to-1908-2014-09-29.csv\n", "queensland-state-archives-military-service-south-african-boer-war-2022-04-22.csv\n", "queensland-state-archives-mineral-leases-1871-to-1940-2013-10-14.csv\n", "queensland-state-archives-miners-rights-1874-to-1880-combined-2018-09-05.csv\n", "queensland-state-archives-miners-rights-and-warden-s-collections-palmer-goldfields-1874-1880-2018-04-26.csv\n", "queensland-state-archives-monthly-and-half-yearly-returns-for-moreton-bay-1829-to-1837-2016-09-08.csv\n", "queensland-state-archives-naturalisations-1851-to-1908-2013-03-03.csv\n", "queensland-state-archives-naturalisations-1880-1885-2021-01-19.csv\n", "queensland-state-archives-nominated-immigrants-1908-to-1922-2013-06-25.csv\n", "queensland-state-archives-nurses-examinations-1915-to-1925-2014-06-24.csv\n", "queensland-state-archives-oronsay-immigration-1925-to-1972-2015-05-28.csv\n", "queensland-state-archives-passage-certificates-1887-to-1906-2013-06-25.csv\n", "queensland-state-archives-passport-clearances-1923-to-1940-2017-01-11.csv\n", "queensland-state-archives-perpetual-lease-selections-of-soldier-settlements-1917-1929-2018-11-13.csv\n", "queensland-state-archives-photographic-records-of-prisoners-1875-1913-2021-01-19.csv\n", "queensland-state-archives-prisoners-admitted-toowoomba-1895-1906-2015-07-02.csv\n", "queensland-state-archives-prisoners-discharged-toowoomba-1869-1879-2017-01-11.csv\n", "queensland-state-archives-prisoners-tried-toowoomba-1864-1903-2015-07-01.csv\n", "queensland-state-archives-rations-issued-to-immigrants-maryborough-1875-1884-2013-06-25.csv\n", "queensland-state-archives-redeemed-land-orders-1860-1907-2021-07-21.csv\n", "queensland-state-archives-reformatory-school-for-boys-1871-to-1906-2014-11-03.csv\n", "queensland-state-archives-register-of-court-fees-marburg-1885-to-1908-2016-09-08.csv\n", "queensland-state-archives-register-of-immigrants-1864-to-1878-2013-10-15.csv\n", "queensland-state-archives-register-of-immigrants-brisbane-1885-to-1917-2013-10-15.csv\n", "queensland-state-archives-register-of-immigrants-toowoomba-1880-to-1888-2017-01-12.csv\n", "queensland-state-archives-register-of-land-sold-1842-to-1859-2014-01-05.csv\n", "queensland-state-archives-register-of-lands-1861-to-1868-2014-01-05.csv\n", "queensland-state-archives-register-of-lands-sold-1842-1868-2022-05-03.csv\n", "queensland-state-archives-register-of-lands-sold-1849-to-1861-2014-03-20.csv\n", "queensland-state-archives-register-of-the-engagement-of-immigrants-at-the-immigration-depot-bowen-1873-1912-2020-06-29.csv\n", "queensland-state-archives-registers-of-applications-by-selectors-1868-1885-2020-06-23.csv\n", "queensland-state-archives-registers-of-immigrants-1882-to-1938-combined-2018-08-28.csv\n", "queensland-state-archives-registers-of-immigrants-promissory-notes-maryborough-1874-1903-2021-07-23.csv\n", "queensland-state-archives-scholarships-in-the-education-office-gazette-1900-1940-2020-06-24.csv\n", "queensland-state-archives-seamen-1882-to-1919-2014-06-23.csv\n", "queensland-state-archives-soldier-settlement-ledgers-1917-to-1929-2016-11-02.csv\n", "queensland-state-archives-south-sea-islanders-1867-to-1908-combined-2017-07-31.csv\n", "queensland-state-archives-st-helena-prisoners-1863-to-1936-2012-12-10.csv\n", "queensland-state-archives-sugar-exemptions-1922-1923-2018-03-06.csv\n", "queensland-state-archives-tb-home-applications-1923-to-1932-2014-06-24.csv\n", "queensland-state-archives-teachers-1860-to-1905-2015-07-01.csv\n", "queensland-state-archives-teachers-in-the-education-office-gazettes-1899-1925-2020-06-29.csv\n", "queensland-state-archives-toowoomba-girls-industrial-school-admissions-and-discharges-1881-to-1903-2014-11-03.csv\n", "queensland-state-archives-toowoomba-girls-reformatory-discharges-1882-to-1903-2014-11-03.csv\n", "queensland-state-archives-toowoomba-prisoners-1864-1906-2022-04-28.csv\n", "queensland-state-archives-transfer-of-runs-1848-to-1874-2015-06-01.csv\n", "queensland-state-archives-trustees-files-1889-to-1929-2013-03-04.csv\n", "queensland-state-archives-wills-1857-to-1940-2017-01-11.csv\n", "queensland-state-archives-windsor-town-council-honour-roll-1914-to-1925-2015-11-24.csv\n", "queensland-state-archives-writs-1857-2008-2018-01-19.csv\n", "state-library-of-queensland-british-convict-registers-2013-05-29.csv\n", "state-library-of-queensland-licensed-victuallers-index-updated-july-2022-2022-08-01.csv\n", "state-library-of-queensland-persons-called-before-queensland-government-committees-1860-1920-2019-07-24.csv\n", "state-library-of-queensland-police-gazette-inquests-1875-1885-updated-dec-2021-2021-12-02.csv\n", "state-library-of-queensland-portraits-of-soldiers-from-the-south-african-war-1899-1902-2014-09-26.csv\n", "state-library-of-queensland-portraits-of-ww1-soldiers-australasian-traveller-2018-08-20.csv\n", "state-library-of-queensland-queensland-mining-accidents-1882-1945-2014-11-23.csv\n", "state-library-of-queensland-queensland-railway-appointees-1890-1915-2018-05-18.csv\n", "state-library-of-queensland-queensland-railway-removals-1890-1915-2018-05-18.csv\n", "state-library-of-queensland-southern-and-western-railway-appointees-1866-1876-2019-07-23.csv\n", "state-library-of-queensland-southern-and-western-railway-removals-1866-1876-2019-07-23.csv\n", "state-library-of-queensland-world-war-1-soldier-portraits-2015-07-02.csv\n", "state-library-of-south-australia-heroes-of-the-great-war-chronicle-1915-1919-2015-07-02.csv\n", "state-library-of-south-australia-heroes-of-the-great-war-chronicle-portraits-1915-1919-2015-07-02.csv\n", "state-library-of-south-australia-old-colonists-men-2019-06-17.csv\n", "state-library-of-south-australia-old-colonists-women-2019-06-17.csv\n", "state-library-of-south-australia-oral-histories-2017-07-07.csv\n", "state-library-of-south-australia-s-a-speaks-an-oral-history-of-life-in-south-australia-before-1930-2017-07-06.csv\n", "state-library-of-south-australia-south-australian-photographs-world-war-1-1914-1929-2018-06-18.csv\n", "state-library-of-south-australia-south-australian-photographs-1900-1919-2019-08-28.csv\n", "state-library-of-south-australia-south-australian-photographs-1920-1949-2019-08-28.csv\n", "state-library-of-south-australia-south-australian-photographs-1950-onwards-2019-08-28.csv\n", "state-library-of-south-australia-south-australian-photographs-pre-1900-2019-08-28.csv\n", "state-library-of-victoria-melbourne-and-metropolitan-hotels-pubs-and-publicans-2017-03-29.csv\n", "state-library-of-western-australia-adopt-a-soldier-photographs-csv-2016-07-25.csv\n", "state-library-of-western-australia-eastern-goldfields-2016-07-28.csv\n", "state-library-of-western-australia-in-memoriam-cards-csv-2016-07-25.csv\n", "state-library-of-western-australia-index-entries-beginning-with-a-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-b-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-c-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-d-and-e-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-f-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-g-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-h-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-i-and-j-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-k-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-l-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-m-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-n-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-o-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-p-and-q-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-r-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-s-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-t-2016-07-28.csv\n", "state-library-of-western-australia-index-entries-beginning-with-u-z-2016-07-28.csv\n", "state-library-of-western-australia-indexed-obituaries-csv-2018-09-04.csv\n", "state-library-of-western-australia-krantz-sheldon-csv-2016-07-25.csv\n", "state-library-of-western-australia-pictorial-collection-csv-2016-07-25.csv\n", "state-library-of-western-australia-slwa-centenary-wwi-2016-07-25.csv\n", "state-library-of-western-australia-wabi-police-subset-2016-07-28.csv\n", "state-library-of-western-australia-york-and-districts-subset-2016-07-28.csv\n", "state-records-office-of-western-australia-index-to-group-settlements-in-wa-2017-06-01.csv\n" ] } ], "source": [ "metadata = {\n", " \"title\": \"GLAM Name Indexes\",\n", " \"description_html\": \"\"\"\n", "

Search for names across an aggregated collection of name indexes from Australian GLAM organisations.

\n", "

For more information about the datasets, see the GLAM data portals section of the GLAM Workbench.

\n", "\"\"\",\n", " \"databases\": {},\n", "}\n", "\n", "for org, csvs in df_final.groupby(by=\"publisher\"):\n", " metadata[\"databases\"][slugify(org)] = {\"title\": org, \"tables\": {}}\n", " db = Database(sqlite3.connect(f\"{slugify(org)}.db\"))\n", " for csv in csvs.itertuples():\n", " print(csv.csv_file)\n", " if csv.dataset_title != csv.file_title_y:\n", " title = f\"{csv.dataset_title} – {csv.file_title_y}\"\n", " else:\n", " title = csv.file_title_y\n", " # print(title)\n", " if csv.dataset_description != csv.file_description:\n", " description = f\"{markdown2.markdown(str(csv.dataset_description))}{markdown2.markdown(str(csv.file_description))}\"\n", " else:\n", " description = markdown2.markdown(str(csv.dataset_description))\n", " if csv.file_modified_y:\n", " description += f\"

Last modified: {csv.file_modified_y}

\"\n", " table_data = {\n", " \"title\": title,\n", " \"description_html\": description,\n", " \"source_url\": csv.download_url,\n", " \"about_url\": csv.info_url,\n", " \"license\": csv.licence,\n", " \"searchmode\": \"raw\",\n", " }\n", " metadata[\"databases\"][slugify(org)][\"tables\"][\n", " slugify(csv.file_title_y)\n", " ] = table_data\n", " table = db[slugify(csv.file_title_y)]\n", " df_csv = pd.read_csv(\n", " Path(\"csvs\", csv.csv_file), keep_default_na=False, low_memory=False\n", " )\n", " for col in csv.drop.split(\"|\"):\n", " if col:\n", " df_csv.drop(columns=col, inplace=True)\n", " for col in csv.extract.split(\"|\"):\n", " if col:\n", " df_csv[f\"{col}_url\"] = df_csv[col].str.extract(r\"(http.*?)'\")\n", " df_csv.drop(columns=col, inplace=True)\n", " table.insert_all(df_csv.to_dict(\"records\"))\n", " cols_to_index = csv.index.split(\"|\")\n", " # print(cols_to_index)\n", " table.enable_fts(cols_to_index)\n", "\n", "with Path(\"metadata.json\").open(\"w\") as json_file:\n", " json_file.write(json.dumps(metadata))" ] }, { "cell_type": "code", "execution_count": 217, "id": "alike-carrier", "metadata": {}, "outputs": [], "source": [ "with Path(\"metadata.json\").open(\"w\") as json_file:\n", " json_file.write(json.dumps(metadata))" ] }, { "cell_type": "code", "execution_count": 12, "id": "revised-harris", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'history-trust-of-south-australia.db libraries-tasmania.db nsw-state-archives.db public-records-office-victoria.db queensland-state-archives.db state-library-of-queensland.db state-library-of-south-australia.db state-library-of-western-australia.db state-records-office-of-western-australia.db'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\" \".join(\n", " sorted(\n", " [p.name for p in Path(\"/Volumes/Workspace/mycode/ozglam-data/src\").glob(\"*.db\")]\n", " )\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.9 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.9" }, "vscode": { "interpreter": { "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" } }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": {}, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 5 }