{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Summarise index details\n", "\n", "This notebook counts the number of rows in each index and calculates the total for the whole repository. It formats the results in nice HTML and Markdown tables for easy browsing." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "tags": [] }, "outputs": [], "source": [ "from urllib.parse import urljoin\n", "\n", "import pandas as pd\n", "from IPython.display import HTML, display\n", "from tabulate import tabulate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Add links and totals to the list of indexes" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "tags": [] }, "outputs": [], "source": [ "# Load the index data\n", "df = pd.read_csv(\"indexes.csv\").sort_values(by=\"title\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "tags": [] }, "outputs": [], "source": [ "def make_download_link(url):\n", " \"\"\"\n", " Create a link to download the CSV file from GitHub\n", " \"\"\"\n", " slug = url.strip(\"/\").split(\"/\")[-1]\n", " filename = f\"{slug}.csv\"\n", " url = urljoin(\n", " \"https://media.githubusercontent.com/media/wragge/srnsw-indexes/master/data/\",\n", " filename,\n", " )\n", " link = 'CSV file'.format(url)\n", " return link\n", "\n", "\n", "# Create a HTML link to the index data on the NSWSA site\n", "df[\"web\"] = df[\"url\"].apply(lambda x: 'Browse index'.format(x))\n", "\n", "# Create a HTML link to download the CSV file from GitHub\n", "df[\"download\"] = df[\"url\"].apply(lambda x: make_download_link(x))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "tags": [] }, "outputs": [], "source": [ "def count_rows(url):\n", " \"\"\"\n", " Count the number of rows in a CSV file.\n", " \"\"\"\n", " slug = url.strip(\"/\").split(\"/\")[-1]\n", " url = urljoin(\n", " \"https://media.githubusercontent.com/media/wragge/srnsw-indexes/master/data/\",\n", " f\"{slug}.csv\",\n", " )\n", " df = pd.read_csv(url, dtype=object)\n", " return df.shape[0]\n", "\n", "\n", "# Add number of rows in the CSV\n", "df[\"rows\"] = df[\"url\"].apply(lambda x: count_rows(x))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "2481881" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# How many rows in the whole repository?\n", "df[\"rows\"].sum()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "title Deceased estates index 1880-1958\n", "url https://mhnsw.au/indexes/deceased-estates/dece...\n", "description Researching deceased estates files is a comple...\n", "category Deceased estates\n", "web \n", "\n", "Title Number of rowsDownload data View at State Archives \n", "\n", "\n", "Aboriginal People in the Register of Aboriginal Reserves 1875-1904 78CSV fileBrowse index \n", "Assisted Immigrants Index 1839-1896 200,000CSV file Browse index \n", "Australian Railway Supply Detachment 1914 65CSV file Browse index \n", "Bankruptcy index 1888-1929 30,000CSV file Browse index \n", "Bench of Magistrates Index 1788-1820 4,442CSV file Browse index \n", "Botanic Gardens and government domains employees 916CSV file Browse index\n", "Bubonic plague index 1900-1908 567CSV file Browse index \n", "Census - 1841 9,355CSV file Browse index \n", "Chemists, druggists and pharmacists index 1876-1920 2,967CSV file Browse index \n", "Child care and protection index 1817-1942 21,292CSV file Browse index \n", "Colonial (Government) Architect index 1837-1970 2,373CSV file Browse index \n", "Colonial Secretary Letters Received, 1826-1896 205,863CSV file Browse index \n", "Colonial Secretary's Papers 1788-1825 144,572CSV file Browse index \n", "Colonial Secretary's letters relating to land 1826-1856 20,000CSV file Browse index \n", "Colonial Secretary's main series of letters received 7,638CSV file Browse index \n", "Convict assignments index 1821-1825 6,156CSV file Browse index \n", "Convict exiles index 1849-1850 3,004CSV file Browse index \n", "Convict indents (digitised) index 1788-1801 20,000CSV file Browse index \n", "Convicts applications to marry 1825-1851 14,327CSV file Browse index \n", "Convicts index 1791-1873 150,000CSV file Browse index \n", "Coroners' inquests index 1796-1824 808CSV file Browse index \n", "Court of Civil Jurisdiction index 1799-1814 2,876CSV file Browse index \n", "Court of Claims (Land) index 1833-1922 2,966CSV file Browse index \n", "Crew and passengers 1828-1841 2,560CSV file Browse index \n", "Criminal court records index 1788-1833 5,028CSV file Browse index \n", "Criminal depositions (Deposition Books) index 1849-1949 117,508CSV file Browse index \n", "Criminal indictments index 1863-1919 20,000CSV file Browse index \n", "Deceased estates index 1880-1958 577,891CSV file Browse index \n", "Depasturing licenses index 1837-1851 7,449CSV file Browse index \n", "Dependent children registers 1883-1923 28,910CSV file Browse index \n", "Devonshire Street Cemetery reinterment index 9,559CSV file Browse index \n", "Divorce records index 1873-1923 21,239CSV file Browse index \n", "Fire Commissioners Personnel 3,767CSV file Browse index \n", "Gaol inmates & prisoners photos index 1870-1930 52,055CSV file Browse index \n", "Gold (auriferous) lease registers 1874-1953 60,000CSV file Browse index \n", "Indigenous colonial court cases 1788-1838 65CSV file Browse index \n", "Infirm & destitute (Government) asylums index 1880-1896 20,000CSV file Browse index \n", "Inquest index 1942-1963 45,547CSV file Browse index \n", "Insolvency index 1842-1887 23,108CSV file Browse index \n", "Intestate estates index 1821-1913 30,000CSV file Browse index \n", "Land grants and leases (registers) 1792-1865 5,627CSV file Browse index \n", "Letters re migration to NSW 1838-1857 22,771CSV file Browse index \n", "Maintenance registers - Metropolitan Children's Court 1915-1917 1,372CSV file Browse index \n", "Miscellaneous immigrants index 1828-1843 8,821CSV file Browse index \n", "NSW Government employees granted military leave 20,000CSV file Browse index \n", "NSW King’s / Queen’s Counsel appointment correspondence 2,083CSV file Browse index \n", "Naturalization index 1834-1903 9,860CSV file Browse index \n", "Nominal Roll of the First Railway Section (AIF) 416CSV file Browse index \n", "Norfolk Island special bundles index 1794-1813 216CSV file Browse index \n", "Nurses index 1926-1954 46,499CSV file Browse index \n", "Police service registers 1852-1913 20,000CSV file Browse index \n", "Port Macquarie Small Debts Register, 1845-1887 2,036CSV file Browse index \n", "Probate records - supplementary index 1790-1875 1,626CSV file Browse index \n", "Public Works Salary Registers 523CSV file Browse index \n", "Publicans' licenses index 1830-1861 20,000CSV file Browse index \n", "Quarter sessions cases 1824-1837 6,232CSV file Browse index \n", "Railway employment records 1856-1917 763CSV file Browse index \n", "Railways and Tramways Roll of Honour 1,214CSV file Browse index \n", "Register of Firms index 1903-1922 50,000CSV file Browse index \n", "School teachers' rolls 1869-1908 20,000CSV file Browse index \n", "Schools and related records 1876-1979 30,181CSV file Browse index \n", "Soldier (Closer) Settlement - Returned Soldiers Transfer files 1907-1951 9,656CSV file Browse index \n", "Soldier (Closer) Settlement transfer registers 1919-1925 4,957CSV file Browse index \n", "Soldier (Closer) settlement promotion files index 1913-1958 4,354CSV file Browse index \n", "Soldier Settlement loan files index 1906-1960 7,642CSV file Browse index \n", "Soldier Settlement miscellaneous files index 1916 1,050CSV file Browse index \n", "Soldier Settlement purchases index 1905-1937 9,776CSV file Browse index \n", "Squatters and graziers index 1837-1849 9,003CSV file Browse index \n", "Surveyor General's crown plans 1792-1886 5,455CSV file Browse index \n", "Surveyors' field books 1794-1860 813CSV file Browse index \n", "Surveyors’ letters 1822-1855 157CSV file Browse index \n", "Tramway employees 1879-1911 10,606CSV file Browse index \n", "Unassisted immigrants index 1842-1855 140,000CSV file Browse index \n", "Unemployed in Sydney 1866 3,222CSV file Browse index \n", "Vessels arrived in Sydney 1837-1925 129,999CSV file Browse index \n", "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Select the columns that we want\n", "columns = df[[\"title\", \"rows\", \"download\", \"web\"]]\n", "\n", "# Create a list of headers\n", "headers = [\"Title\", \"Number of rows\", \"Download data\", \"View at State Archives\"]\n", "\n", "# Use Tabulate to generate a HTML table\n", "display(\n", " HTML(\n", " tabulate(\n", " columns, headers=headers, showindex=False, tablefmt=\"unsafehtml\", intfmt=\",\"\n", " )\n", " )\n", ")\n", "\n", "# Write a GitHub Markdown formatted version of the table to a file\n", "with open(\"indexes.md\", \"w\") as md_file:\n", " md_file.write(\n", " tabulate(\n", " columns, headers=headers, showindex=False, tablefmt=\"github\", intfmt=\",\"\n", " )\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "----\n", "\n", "Created by [Tim Sherratt](https://timsherratt.org/) for the [GLAM Workbench](https://glam-workbench.net/) project." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.2" } }, "nbformat": 4, "nbformat_minor": 4 }