In [84]:
import io
import json
import sqlite3
import zipfile
from pathlib import Path

import markdown2
import pandas as pd
import requests_cache
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from slugify import slugify
from sqlite_utils import Database

s = requests_cache.CachedSession()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[502, 503, 504])
s.mount("https://", HTTPAdapter(max_retries=retries))
s.mount("http://", HTTPAdapter(max_retries=retries))

## Save local copies of all CSV datasets

In [184]:
df_csvs = pd.read_csv("glam-datasets-from-gov-portals-csvs.csv")

In [185]:
df_csvs.fillna("", inplace=True)

In [186]:
df_csvs["file_index"] = df_csvs.apply(
    lambda x: f'{slugify(x["publisher"])}-{slugify(x["file_title"])}-{slugify(x["file_created"][:10])}',
    axis=1,
)

In [106]:
def read_csv(url, header=0, encoding=0):
    """
    Loop through some encoding/parsing options to see if we can get the CSV to open properly.
    """
    encodings = ["ISO-8859-1", "latin-1"]
    headers = [None]
    try:
        if encoding > 0 and header > 0:
            df = pd.read_csv(
                url,
                sep=None,
                engine="python",
                na_values=["-", " "],
                encoding=encodings[encoding - 1],
                header=headers[header - 1],
            )
        elif encoding > 0:
            df = pd.read_csv(
                url,
                sep=None,
                engine="python",
                na_values=["-", " "],
                encoding=encodings[encoding - 1],
            )
        elif header > 0:
            df = pd.read_csv(
                url,
                sep=None,
                engine="python",
                na_values=["-", " "],
                header=headers[header - 1],
            )
        else:
            df = pd.read_csv(url, sep=None, engine="python", na_values=["-", " "])
    except UnicodeDecodeError:
        if encoding == len(encodings):
            raise
        else:
            return read_csv(url=url, header=header, encoding=encoding + 1)
    except pd.errors.ParserError:
        if header == len(headers):
            raise
        else:
            return read_csv(url=url, header=header + 1, encoding=encoding)
    else:
        return df


for i, csv in enumerate(df_csvs.itertuples()):
    # print(csv.dataset_title)
    try:
        response = s.get(csv.download_url)
        response.raise_for_status
    except:
        print(csv.dataset_title)
    with Path("csvs", f"{csv.file_index}.csv").open("w") as csv_file:
        csv_file.write(response.text)

PROV Digitisation Program statistics
PROV Workforce Data 19-20
PROV Annual Report - Records Issued & Visitor Statistics 2015-2016


## Create a list of datasets for index checking

In [189]:
# This will throw errors where the contents aren't CSV files
# Delete these
dfs = []
for csv in Path("csvs").glob("*.csv"):
    # print(csv)
    file_index = csv.name.split(".")[0]
    # print(file_index)
    try:
        details = (
            df_csvs.loc[df_csvs["file_index"] == file_index][
                ["publisher", "info_url", "file_title", "file_modified"]
            ]
            .iloc[0]
            .to_dict()
        )
    except IndexError:
        print(f"No details -- {file_index}")
    else:
        details["csv_file"] = csv.name
        try:
            df_csv = pd.read_csv(csv, low_memory=False)
        except:
            print(f"Error -- {file_index}")
        else:
            details["columns"] = "|".join(list(df_csv.columns))
            dfs.append(details)
df = pd.DataFrame(dfs)

No details -- history-trust-of-sa-suffrage-petition
Error -- queensland-state-archives-corporate-school-files-works-facilities-works-establishment-files-1871-1998-2018-02-23
Error -- state-library-of-south-australia-fire-insurance-maps-1911-1914-2014-06-22
No details -- public-records-office-victoria-outwards-passengers-from-victoria-1852-1915-2014-08-01
No details -- history-trust-of-sa-passengers-in-history
Error -- south-australian-museum-consultants-2017-18-2019-08-15
Error -- nsw-state-archives-railway-employment-records-2014-09-30
Error -- state-library-of-south-australia-19th-century-photographs-by-ernest-gall-2014-06-10
Error -- queensland-museum-queensland-museum-collection-of-ethnographic-object-records-2014-06-25
Error -- state-library-of-south-australia-bradman-collection-2013-11-18
Error -- nsw-state-archives-nsw-govt-railways-and-tramways-roll-of-honour-1914-1919-csv-2014-09-30
Error -- queensland-museum-queensland-museum-collection-of-historical-object-records-2014-06-25

In [190]:
df

Unnamed: 0,dataset_title,publisher,author,dataset_issued,dataset_modified,dataset_description,source,info_url,start_date,end_date,...,download_url,format,file_description,file_created,file_modified,file_size,licence,file_index,csv_file,columns
0,State Library of Queensland - Real estate maps,State Library of Queensland,opendata@slq.qld.gov.au,2012-12-07T06:05:16.640302,2020-12-09T05:55:15.871780,A unique collection of original maps and plans...,data.qld.gov.au,https://data.qld.gov.au/dataset/959d611f-a9cf-...,,,...,https://www.data.qld.gov.au/dataset/959d611f-a...,CSV,This updated dataset includes links to 798 dig...,2018-02-28T04:50:33.127516,2019-08-19T06:18:57.312772,252416,Creative Commons Attribution 4.0,state-library-of-queensland-real-estate-maps-f...,state-library-of-queensland-real-estate-maps-f...,Title|Description|Lat|Lon|Link|ID
1,Passport registers 1926 to 1939,Queensland State Archives,web@archives.qld.gov.au,2013-10-14T06:10:08.409229,2022-06-20T23:00:36.801163,These indexes were compiled from the passport ...,data.qld.gov.au,https://data.qld.gov.au/dataset/fc87f25a-dc02-...,,,...,https://www.data.qld.gov.au/dataset/fc87f25a-d...,CSV,This open data file lists the names of immigra...,2017-01-11T23:47:35.449465,2022-01-10T04:53:27.827980,2831155,Creative Commons Attribution 4.0,queensland-state-archives-passport-clearances-...,queensland-state-archives-passport-clearances-...,Last name|Given names|Notes|Date of arrival|Ye...
2,Assisted immigration 1848 to 1912,Queensland State Archives,web@archives.qld.gov.au,2013-03-04T06:34:34.270023,2022-06-20T12:57:24.964249,These indexes were created from the [Registers...,data.qld.gov.au,https://data.qld.gov.au/dataset/ba182873-e8a7-...,,,...,https://www.data.qld.gov.au/dataset/ba182873-e...,CSV,This open data file lists the names of assiste...,2013-03-05T23:30:57.308546,2022-06-14T07:46:06.234434,2621440,Creative Commons Attribution 4.0,queensland-state-archives-assisted-immigration...,queensland-state-archives-assisted-immigration...,Last name|Given names|Notes|Age|Ship|Date|Year...
3,Australian South Sea Islanders 1867 to 1908,Queensland State Archives,web@archives.qld.gov.au,2014-06-25T04:29:57.438596,2022-06-20T13:07:35.777233,This index was compiled from a wide variety of...,data.qld.gov.au,https://data.qld.gov.au/dataset/eae0afa9-681c-...,,,...,https://www.data.qld.gov.au/dataset/eae0afa9-6...,CSV,This open data file lists the names (L-Z) of A...,2017-01-11T01:32:27.747955,2017-01-11T01:32:27.556535,13107200,Creative Commons Attribution 4.0,queensland-state-archives-australian-south-sea...,queensland-state-archives-australian-south-sea...,Last name|Given name/s|Page|Date|Ref|Prev sys ...
4,Queensland Museum collection of protozoan spec...,Queensland Museum,opendata@qm.qld.gov.au,2014-02-18T23:18:45.102073,2019-07-10T16:42:34.524484,A list of specimens of protozoan species in Qu...,data.qld.gov.au,https://data.qld.gov.au/dataset/4f1071f2-f4fa-...,,,...,http://www.qm.qld.gov.au/microsites/data/proto...,CSV,A CSV file containing records of all protozoan...,2014-02-18T23:19:05.331656,2017-06-23T00:00:00,41733324,Creative Commons Attribution 4.0,queensland-museum-queensland-museum-protozoan-...,queensland-museum-queensland-museum-protozoan-...,dcterms:type|dcterms:modified|dcterms:language...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723,SA FOI – number of fee waiver or reduction by ...,State Records South Australia,State Records,2018-02-02T04:38:06.752608,2018-12-23T01:22:00.630016,2017-18 annual reporting data on the number of...,data.sa.gov.au,https://data.sa.gov.au/data/dataset/f923f9b0-b...,2012-07-01,2018-06-30,...,https://data.sa.gov.au/data/dataset/f923f9b0-b...,CSV,2017-18 annual reporting data on the number of...,2018-02-02T15:38:27.930264,2018-12-23T01:22:00.608148,,Creative Commons Attribution,state-records-south-australia-sa-foi-number-of...,state-records-south-australia-sa-foi-number-of...,"Reason for fee waiver, by sector|No. of waiver..."
724,State Library of Queensland - Catalogue searches,State Library of Queensland,opendata@slq.qld.gov.au,2012-12-07T05:55:14.502123,2021-03-08T07:42:00.611055,This open data file contains the text strings ...,data.qld.gov.au,https://data.qld.gov.au/dataset/cebb997c-1c42-...,,,...,https://www.data.qld.gov.au/dataset/cebb997c-1...,CSV,The text strings searched and count of recurri...,2019-06-18T06:37:31.010964,2019-08-27T01:13:09.674578,104448,Creative Commons Attribution 4.0,state-library-of-queensland-july-2017-catalogu...,state-library-of-queensland-july-2017-catalogu...,Search strings|Count
725,World War I Soldiers and Nurses (1914-1928).,Libraries Tasmania,Libraries Tasmania,2015-06-15T03:04:09.056176,2021-11-23T14:36:42.489452,"Photographs, articles and applications for lan...",data.gov.au,https://data.gov.au/dataset/b711231a-2a02-48eb...,1914,1928,...,https://data.gov.au/data/dataset/b711231a-2a02...,CSV,,2016-03-22T10:01:58.539607,2021-11-23,2835528,Creative Commons Attribution 4.0 International,libraries-tasmania-world-war-one-tasmanian-pho...,libraries-tasmania-world-war-one-tasmanian-pho...,DIGITAL_OBJECT - URL_TEXT|DIGITAL_OBJECT - URL...
726,"Deceased Estate Files, 1880-1923",NSW State Archives,State Records Authority,2014-09-30T04:52:48.805972,2016-07-20T12:09:20.785878,Researching deceased estates files before 1923...,data.nsw.gov.au,https://data.nsw.gov.au/data/dataset/5d45437c-...,,,...,https://data.nsw.gov.au/data/dataset/5d45437c-...,CSV,This dataset contains the following attributes...,2014-09-30T00:55:53.313012,,,Creative Commons Attribution,nsw-state-archives-deceased-estates-2014-09-30,nsw-state-archives-deceased-estates-2014-09-30...,Surname|FirstName|Locality|DateOfDeath|DateDut...


In [109]:
df.to_csv(f'csvs_for_indexing_{datetime.datetime.now().strftime('%Y%m%d')}.csv', index=False)

## Merge checked files

In [110]:
# Get previously checked details
# Change date to previous checked file
df_checked = pd.read_csv(
    "csvs_for_indexing_checked_20211018.csv", keep_default_na=False
)[["publisher", "info_url", "csv_file", "index", "drop", "extract"]]
df_checked.head()

Unnamed: 0,publisher,info_url,csv_file,index,drop,extract
0,Australian Institute of Aboriginal and Torres ...,https://data.gov.au/dataset/11cbf24a-a31a-488c...,australian-institute-of-aboriginal-and-torres-...,,,
1,Libraries Tasmania,https://data.gov.au/dataset/b0627a17-6783-4c18...,libraries-tasmania-bankruptcy-csv-2017-07-14.csv,NAME|NAME_SEE_ALSO,,
2,Libraries Tasmania,https://data.gov.au/dataset/069a423b-abd8-4454...,libraries-tasmania-colonial-secretary-correspo...,DESC|NAME|NAME_SEE_ALSO,,
3,Libraries Tasmania,https://data.gov.au/dataset/58a9a8d7-01e0-43df...,libraries-tasmania-court-csv-2017-07-14.csv,NAME,,
4,Libraries Tasmania,https://data.gov.au/dataset/d7ec2d93-b9dd-482b...,libraries-tasmania-digitised-archives-csv-2016...,,,


In [191]:
# Merge checking files
# df_new_check = pd.merge(df, df_checked, how='left', on=['publisher', 'info_url', 'file_title', 'file_modified'])
# This should merge latest harvested data with the indexing info from the checked file
df_new_check = pd.merge(
    df, df_checked, how="left", on=["publisher", "info_url", "csv_file"]
)

In [192]:
df_new_check

Unnamed: 0,publisher,info_url,file_title_x,file_modified_x,csv_file,file_title_y,file_modified_y,columns,index,drop,extract
0,State Library of Queensland,https://data.qld.gov.au/dataset/959d611f-a9cf-...,Real Estate Maps February 2018,2019-08-19T06:18:57.312772,state-library-of-queensland-real-estate-maps-f...,Real Estate Maps February 2018,2019-08-19T06:18:57.313,Title|Description|Lat|Lon|Link|ID,,,
1,Queensland State Archives,https://data.qld.gov.au/dataset/fc87f25a-dc02-...,Passport clearances 1923 to 1940,2022-01-10T04:53:27.827980,queensland-state-archives-passport-clearances-...,Passport clearances 1923 to 1940,2022-01-10T04:53:27.828,Last name|Given names|Notes|Date of arrival|Ye...,Last name|Given names,Description,
2,Queensland State Archives,https://data.qld.gov.au/dataset/ba182873-e8a7-...,Assisted immigration 1848 to 1912 - A,2022-06-14T07:46:06.234434,queensland-state-archives-assisted-immigration...,Assisted immigration 1848 to 1912 - A,2022-06-14T07:46:06.234,Last name|Given names|Notes|Age|Ship|Date|Year...,,,
3,Queensland State Archives,https://data.qld.gov.au/dataset/eae0afa9-681c-...,Australian South Sea Islanders 1867 to 1908 L-Z,2017-01-11T01:32:27.556535,queensland-state-archives-australian-south-sea...,Australian South Sea Islanders 1867 to 1908 L-Z,2017-01-11T01:32:27.557,Last name|Given name/s|Page|Date|Ref|Prev sys ...,,,
4,Queensland Museum,https://data.qld.gov.au/dataset/4f1071f2-f4fa-...,Queensland Museum protozoan collection records,2017-06-23T00:00:00,queensland-museum-queensland-museum-protozoan-...,Queensland Museum protozoan collection records,2017-06-23T00:00:00,dcterms:type|dcterms:modified|dcterms:language...,,,
...,...,...,...,...,...,...,...,...,...,...,...
723,State Records South Australia,https://data.sa.gov.au/data/dataset/f923f9b0-b...,SA FOI – number of fee waiver or reduction by ...,2018-12-23T01:22:00.608148,state-records-south-australia-sa-foi-number-of...,SA FOI – number of fee waiver or reduction by ...,2018-12-23T01:22:00.608,"Reason for fee waiver, by sector|No. of waiver...",,,
724,State Library of Queensland,https://data.qld.gov.au/dataset/cebb997c-1c42-...,July 2017 Catalogue searches,2019-08-27T01:13:09.674578,state-library-of-queensland-july-2017-catalogu...,July 2017 Catalogue searches,2019-08-27T01:13:09.675,Search strings|Count,,,
725,Libraries Tasmania,https://data.gov.au/dataset/b711231a-2a02-48eb...,World War One Tasmanian Photographs - CSV,2021-11-23,libraries-tasmania-world-war-one-tasmanian-pho...,World War One Tasmanian Photographs - CSV,2021-11-23,DIGITAL_OBJECT - URL_TEXT|DIGITAL_OBJECT - URL...,NAME,,
726,NSW State Archives,https://data.nsw.gov.au/data/dataset/5d45437c-...,Deceased Estates,,nsw-state-archives-deceased-estates-2014-09-30...,Deceased Estates,,Surname|FirstName|Locality|DateOfDeath|DateDut...,,,


Manually check the file below to see if all historical files with names have index values. Check against list of new file titles.

In [113]:
# Save new checking file
df_new_check.to_csv(f'csvs_for_indexing_checked_{datetime.datetime.now().strftime('%Y%m%d')}.csv', index=False)

## PROV datasets are zipped

In [94]:
prov_csvs = pd.read_csv("extra-prov-indexes.csv")
prov_csvs["file_index"] = prov_csvs.apply(
    lambda x: f'{slugify(x["publisher"])}-{slugify(x["file_title"])}-{slugify(x["file_created"][:10])}',
    axis=1,
)
for i, csv in enumerate(prov_csvs.itertuples()):
    print(csv.dataset_title)
    response = s.get(csv.download_url)
    response.raise_for_status
    if csv.format == "ZIP":
        print(
            f"{slugify(csv.publisher)}-{slugify(csv.file_title)}-{csv.file_created[:10]}.csv"
        )
        try:
            z = zipfile.ZipFile(io.BytesIO(response.content))
            z.extractall("prov_csvs")
        except zipfile.BadZipFile:
            pass
    else:
        with Path(
            "prov_csvs",
            f"{slugify(csv.publisher)}-{slugify(csv.file_title)}-{csv.file_created[:10]}.csv",
        ).open("w") as csv_file:
            csv_file.write(response.text)

Victorian World War One Soldier Settlers
British Assisted Passengers to Victoria 1839-1871
public-records-office-victoria-british-assisted-passengers-to-victoria-1839-1871-2014-08-01.csv
Unassisted Inward Passengers 1852-1923
public-records-office-victoria-unassisted-inward-passengers-1852-1923-2020-10-27.csv
Outwards Passengers from Victoria 1852-1915
public-records-office-victoria-outwards-passengers-from-victoria-1852-1915-2014-08-01.csv


In [95]:
prov_df = pd.DataFrame()
for csv in Path("prov_csvs").glob("*.csv"):
    print(csv)
    file_index = csv.name.split(".")[0]
    details = (
        prov_csvs.loc[prov_csvs["file_index"] == file_index][
            ["publisher", "info_url", "file_title", "file_modified"]
        ]
        .iloc[0]
        .to_dict()
    )
    details["csv_file"] = csv.name
    df_csv = pd.read_csv(csv, low_memory=False)
    details["columns"] = "|".join(list(df_csv.columns))
    prov_df = prov_df.append(details, ignore_index=True)

prov_csvs/public-records-office-victoria-british-assisted-passengers-to-victoria-1839-1871-2014-08-01.csv


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


prov_csvs/public-records-office-victoria-outwards-passengers-from-victoria-1852-1915-2014-08-01.csv
prov_csvs/public-records-office-victoria-victorian-world-war-one-soldier-settlers-2015-05-29.csv
prov_csvs/public-records-office-victoria-unassisted-inward-passengers-1852-1923-2020-10-27.csv


  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [178]:
prov_df.to_csv("prov_csvs_for_indexing.csv", index=False)

In [None]:
{
    "databases": {
        "database1": {
            "source": "Alternative source",
            "source_url": "http://example.com/",
            "tables": {
                "example_table": {
                    "description_html": "Custom <em>table</em> description",
                    "license": "CC BY 3.0 US",
                    "license_url": "https://creativecommons.org/licenses/by/3.0/us/",
                }
            },
        }
    }
}

In [212]:
df_checked = pd.read_csv(
    "csvs_for_indexing_checked_20220809.csv", keep_default_na=False
)
df_checked_filtered = df_checked.loc[df_checked["index"] != ""]
df_prov_checked = pd.read_csv(
    "prov_csvs_for_indexing_checked.csv", keep_default_na=False
)
df_sa_checked = pd.read_csv("sa_datasets_for_checking.csv", keep_default_na=False)
df_all_checked = pd.concat([df_checked_filtered, df_prov_checked, df_sa_checked])
df_all = pd.read_csv("glam-datasets-from-gov-portals.csv", keep_default_na=False)
df_all["csv_file"] = df_all.apply(
    lambda x: f'{slugify(x["publisher"])}-{slugify(x["file_title"])}-{slugify(x["file_created"][:10])}.csv',
    axis=1,
)

# df_final = pd.merge(df_all_checked, df_all, how='left', on=['file_title', 'info_url', 'publisher', 'file_modified'])
df_final = pd.merge(
    df_all_checked, df_all, how="left", on=["info_url", "publisher", "csv_file"]
)

In [213]:
df_final.shape

(194, 25)

In [214]:
df_all.loc[df_all["publisher"] == "History Trust of South Australia"][
    "csv_file"
].to_list()

['history-trust-of-south-australia-executive-employment-at-the-history-trust-of-south-australia-2011-2020-2018-09-03.csv',
 'history-trust-of-south-australia-consultants-engaged-by-the-history-trust-of-south-australia-2019-11-08.csv',
 'history-trust-of-south-australia-public-complaints-received-by-history-trust-of-south-australia-2018-09-03.csv',
 'history-trust-of-south-australia-fraud-detection-at-history-trust-of-south-australia-2011-2021-2018-09-03.csv',
 'history-trust-of-south-australia-whistleblowers-disclosure-for-history-trust-of-south-australia-2011-2021-2018-09-03.csv',
 'history-trust-of-south-australia-contractors-engaged-by-the-history-trust-of-south-australia-2019-11-08.csv',
 'history-trust-of-south-australia-passengers-in-history-search-index-2016-06-28.csv',
 'history-trust-of-south-australia-workplace-injury-claims-received-by-history-trust-of-south-australia-2019-20-2018-09-03.csv',
 'history-trust-of-south-australia-suffrage125-petition-2019-04-04.csv',
 'history-

In [216]:
metadata = {
    "title": "GLAM Name Indexes",
    "description_html": """
<p><b>Search for names across an aggregated collection of name indexes from Australian GLAM organisations.</b></p>
<p>For more information about the datasets, see the <a href="https://glam-workbench.net/glam-data-portals/">GLAM data portals</a> section of the GLAM Workbench.</p>
""",
    "databases": {},
}

for org, csvs in df_final.groupby(by="publisher"):
    metadata["databases"][slugify(org)] = {"title": org, "tables": {}}
    db = Database(sqlite3.connect(f"{slugify(org)}.db"))
    for csv in csvs.itertuples():
        print(csv.csv_file)
        if csv.dataset_title != csv.file_title_y:
            title = f"{csv.dataset_title} – {csv.file_title_y}"
        else:
            title = csv.file_title_y
        # print(title)
        if csv.dataset_description != csv.file_description:
            description = f"{markdown2.markdown(str(csv.dataset_description))}{markdown2.markdown(str(csv.file_description))}"
        else:
            description = markdown2.markdown(str(csv.dataset_description))
        if csv.file_modified_y:
            description += f"<p>Last modified: {csv.file_modified_y}</p>"
        table_data = {
            "title": title,
            "description_html": description,
            "source_url": csv.download_url,
            "about_url": csv.info_url,
            "license": csv.licence,
            "searchmode": "raw",
        }
        metadata["databases"][slugify(org)]["tables"][
            slugify(csv.file_title_y)
        ] = table_data
        table = db[slugify(csv.file_title_y)]
        df_csv = pd.read_csv(
            Path("csvs", csv.csv_file), keep_default_na=False, low_memory=False
        )
        for col in csv.drop.split("|"):
            if col:
                df_csv.drop(columns=col, inplace=True)
        for col in csv.extract.split("|"):
            if col:
                df_csv[f"{col}_url"] = df_csv[col].str.extract(r"(http.*?)'")
                df_csv.drop(columns=col, inplace=True)
        table.insert_all(df_csv.to_dict("records"))
        cols_to_index = csv.index.split("|")
        # print(cols_to_index)
        table.enable_fts(cols_to_index)

with Path("metadata.json").open("w") as json_file:
    json_file.write(json.dumps(metadata))

history-trust-of-south-australia-passengers-in-history-search-index-2016-06-28.csv
history-trust-of-south-australia-suffrage125-petition-2019-04-04.csv
libraries-tasmania-bankruptcy-csv-2017-07-14.csv
libraries-tasmania-colonial-secretary-correspondence-csv-2019-09-05.csv
libraries-tasmania-court-csv-2017-07-14.csv
libraries-tasmania-education-csv-2019-09-05.csv
libraries-tasmania-eheritage-data-csv-2017-07-17.csv
libraries-tasmania-employment-csv-2019-09-05.csv
libraries-tasmania-hotels-properties-csv-2016-03-22.csv
libraries-tasmania-land-records-csv-2021-03-23.csv
libraries-tasmania-miscellaneous-csv-2019-09-05.csv
libraries-tasmania-tasmanian-arrivals-csv-2016-03-22.csv
libraries-tasmania-tasmanian-births-csv-2016-03-22.csv
libraries-tasmania-tasmanian-census-csv-2016-03-22.csv
libraries-tasmania-tasmanian-convicts-csv-2016-03-22.csv
libraries-tasmania-tasmanian-convicts-permission-to-marry-csv-2016-03-22.csv
libraries-tasmania-tasmanian-deaths-csv-2016-03-22.csv
libraries-tasmania

In [217]:
with Path("metadata.json").open("w") as json_file:
    json_file.write(json.dumps(metadata))

In [12]:
" ".join(
    sorted(
        [p.name for p in Path("/Volumes/Workspace/mycode/ozglam-data/src").glob("*.db")]
    )
)

'history-trust-of-south-australia.db libraries-tasmania.db nsw-state-archives.db public-records-office-victoria.db queensland-state-archives.db state-library-of-queensland.db state-library-of-south-australia.db state-library-of-western-australia.db state-records-office-of-western-australia.db'