{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Harvest recently digitised files from RecordSearch\n", "\n", "This notebook scrapes data from the 'Newly scanned records' section of [RecordSearch](https://recordsearch.naa.gov.au/), creating a list of recently digitised files. I ran this code on 27 March 2021 to generate [a dataset](data/recently-digitised-20210327.csv) containing files that had been digitised in the previous month.\n", "\n", "The 'Newly scanned records' only display a month's worth of additions. However, I've modified the code below to create a 'git scraper' that uses GitHub actions to run the harvester every Sunday, saving a list of the files digitised in the previous week into a [public repository](https://github.com/wragge/naa-recently-digitised). Over time, this should build up a more complete record of the digitisation process.\n", "\n", "It took me a while to figure out how the pagination worked in the 'Newly scanned records' site. As you can see below, it's a matter of adding inputs to the main navigation form that mimic a click on the page navigation buttons. Screen scraping is such fun... 😬" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import what we need" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import re\n", "import time\n", "from pathlib import Path\n", "\n", "import altair as alt\n", "import arrow\n", "import mechanicalsoup\n", "import pandas as pd\n", "from recordsearch_data_scraper.scrapers import RSSeries\n", "from tqdm.auto import tqdm" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define some functions" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def initialise_browser():\n", " \"\"\"\n", " This is necessary to get an active session in RS.\n", " \"\"\"\n", " browser = mechanicalsoup.StatefulBrowser()\n", " browser.open(\"https://recordsearch.naa.gov.au/scripts/Logon.asp?N=guest\")\n", " # As of Jan 2023 these lines don't seem necessary and cause a LinkNotFound error\n", " # browser.select_form('form[id=\"t\"]')\n", " # browser.submit_selected()\n", " return browser\n", "\n", "\n", "def get_date_digitised(result):\n", " \"\"\"\n", " Generate a formatted date from the date digitised string (eg 'Digitised 1 days ago').\n", " It does this by getting today's date then subtracting the interval.\n", " It's possible this might not always be accurate...\n", " \"\"\"\n", " # Get the string describing when the record was digitised\n", " when_digitised = result.find(\n", " \"div\", class_=\"card-footer card-footer-list\"\n", " ).span.string.strip()\n", "\n", " # Extract out the time interval and unit\n", " interval, unit = re.search(\n", " r\"^Digitised (\\d+) (minutes|hours|days) ago\", when_digitised\n", " ).groups()\n", "\n", " # Subtract interval from today's date\n", " if unit == \"minutes\":\n", " date_digitised = arrow.now(\"Australia/Sydney\").shift(minutes=-(int(interval)))\n", " elif unit == \"days\":\n", " date_digitised = arrow.now(\"Australia/Sydney\").shift(days=-(int(interval)))\n", " elif unit == \"hours\":\n", " date_digitised = arrow.now(\"Australia/Sydney\").shift(hours=-(int(interval)))\n", "\n", " # ISO format the result\n", " return date_digitised.format(\"YYYY-MM-DD\")\n", "\n", "\n", "def get_records_from_page(page, pbar):\n", " \"\"\"\n", " Scrapes item metadata from the list of results.\n", " \"\"\"\n", " records = []\n", "\n", " # Get the list of results\n", " results = page.find_all(\"li\", class_=\"soda_list\")\n", "\n", " # Loop through the results, extracting the metadata\n", " for result in results:\n", " record = {}\n", " record[\"title\"] = result.img[\"title\"]\n", " record[\"item_id\"] = (\n", " result.find(\"dt\", string=\"Item ID:\")\n", " .find_next_sibling(\"dd\")\n", " .a.string.strip()\n", " )\n", " record[\"series\"] = (\n", " result.find(\"dt\", string=\"Series:\").find_next_sibling(\"dd\").a.string.strip()\n", " )\n", " record[\"control_symbol\"] = (\n", " result.find(\"dt\", string=re.compile(\"Control symbol:\"))\n", " .find_next_sibling(\"dd\")\n", " .string.strip()\n", " )\n", " record[\"date_range\"] = re.sub(\n", " r\"\\s+\",\n", " \" \",\n", " result.find(\"dt\", string=re.compile(\"Date range:\"))\n", " .find_next_sibling(\"dd\")\n", " .string.strip(),\n", " )\n", " record[\"date_digitised\"] = get_date_digitised(result)\n", " records.append(record)\n", " pbar.update(len(records))\n", " return records\n", "\n", "\n", "def get_number_of_results(page):\n", " \"\"\"\n", " Get the start, end, and total number of results from the current page of results.\n", " \"\"\"\n", " result_summary = page.find(\n", " \"label\", id=\"ContentPlaceHolderSNR_lblTopPaging\"\n", " ).string.strip()\n", " start, end, total = re.search(r\"(\\d+) to (\\d+) of (\\d+)\", result_summary).groups()\n", " return (start, end, total)\n", "\n", "\n", "def harvest_recently_digitised():\n", " records = []\n", "\n", " # Get a browser with all RecordSearch's session stuff ready\n", " browser = initialise_browser()\n", "\n", " # Open the recently digitised page\n", " browser.open(\n", " \"https://recordsearch.naa.gov.au/SearchNRetrieve/Interface/ListingReports/NewlyScannedList.aspx\"\n", " )\n", "\n", " # CONFIGURE THE RESULTS FORM\n", " browser.select_form('form[id=\"formSNRMaster\"]')\n", "\n", " # Get 200 results per page\n", " browser[\"ctl00$ContentPlaceHolderSNR$ddlResultsPerPage\"] = \"200\"\n", "\n", " # Get results from the past month. Other options are 'w' (week) and 'f' (fortnight).\n", " browser[\"ctl00$ContentPlaceHolderSNR$ddlDateAdded\"] = \"m\"\n", "\n", " # Set display to list view\n", " # Setting these mimics a click on the List View button\n", " browser.form.set(\"ctl00$ContentPlaceHolderSNR$btn_viewList.x\", \"11\", force=True)\n", " browser.form.set(\"ctl00$ContentPlaceHolderSNR$btn_viewList.y\", \"9\", force=True)\n", " browser.submit_selected()\n", "\n", " # PROCESS RESULTS\n", " # Get the total number of results\n", " start, end, total = get_number_of_results(browser.page)\n", "\n", " with tqdm(total=int(total)) as pbar:\n", "\n", " # Process first page of results\n", " records += get_records_from_page(browser.page, pbar)\n", "\n", " # Loop through the rest of the results set\n", " while end != total:\n", " browser.select_form('form[id=\"formSNRMaster\"]')\n", "\n", " # Setting these and submitting the form retrieves th next page of results\n", " # Basically they mimic a click on the page navigation buttons\n", " browser.form.set(\n", " \"ctl00$ContentPlaceHolderSNR$listPagerTop$ctl00$ctl02.x\",\n", " \"10\",\n", " force=True,\n", " )\n", " browser.form.set(\n", " \"ctl00$ContentPlaceHolderSNR$listPagerTop$ctl00$ctl02.y\",\n", " \"10\",\n", " force=True,\n", " )\n", " browser.submit_selected()\n", "\n", " start, end, total = get_number_of_results(browser.page)\n", " records += get_records_from_page(browser.page, pbar)\n", " time.sleep(1)\n", " return records" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Run the harvest" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "nbval-skip" ] }, "outputs": [], "source": [ "records = harvest_recently_digitised()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Convert the results to a DataFrame" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "nbval-skip" ] }, "outputs": [], "source": [ "df_records = pd.DataFrame(records)\n", "df_records.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Find titles of series listed in the dataset\n", "\n", "The dataset only includes the series identifiers. To make it a bit more useful, we can retrieve the title of each series and add this to the dataset.\n", "\n", "First we extract a list of unique series identifiers from the dataset, then loop through it, grabbing the series details using my RecordSearch tools library." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "nbval-skip" ] }, "outputs": [], "source": [ "series_titles = []\n", "\n", "# Loop through the list of series ids\n", "for s in tqdm(list(df_records[\"series\"].unique())):\n", "\n", " # Get the summary details from each series\n", " # Note that this includes more information than the title which could be added into the dataset if you wanted (eg location)\n", " details = RSSeries(\n", " s, include_number_digitised=False, include_access_status=False\n", " ).data\n", "\n", " # Add the titles and ids to a new list\n", " try:\n", " series_titles.append({\"series\": s, \"series_title\": details[\"title\"]})\n", " except KeyError:\n", " print(details)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Then we can convert the series titles into a dataframe and merge it with the records dataframe to create a new dataframe that includes the titles." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "nbval-skip" ] }, "outputs": [], "source": [ "df_series = pd.DataFrame(series_titles)\n", "\n", "# Merge the dataframes on the `series` column\n", "df = df_records.merge(df_series, on=\"series\")\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save the results to a CSV file" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "tags": [ "nbval-skip" ] }, "outputs": [], "source": [ "df.to_csv(\n", " Path(\"data\", f'recently-digitised-{arrow.now().format(\"YYYYMMDD\")}.csv'),\n", " index=False,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Which series do the digitised files come from?\n", "\n", "Let's get a list of the series that appear most often in the dataset." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Reload previously harvested file if necessary\n", "df = pd.read_csv(\"data/recently-digitised-20210327.csv\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seriesseries_titlecount
0B884Citizen Military Forces Personnel Dossiers, 19...20382
1A9301RAAF Personnel files of Non-Commissioned Offic...1207
2B883Second Australian Imperial Force Personnel Dos...515
3A10605Personnel Occurrence Reports396
4A6135Photographic colour transparencies positives, ...226
5D4881Alien registration cards, alphabetical series66
6MP367/1General correspondence files48
7A9300RAAF Officers Personnel files, 1921-194847
8A12372RAAF Personnel files - All Ranks [Main corresp...40
9BP5/2Drawings of inventions for letters patent, sin...37
10B78Alien registration documents36
11A2478Non-British European migrant selection documents34
12MP84/1Correspondence files, multiple number series30
13BP371/1Correspondence registration booklets and cards26
14A705Correspondence files, multiple number (Melbour...25
15A471Courts-Martial files [including war crimes tri...23
16J3111Queensland post office history files, alphabet...23
17J3109Historic photographic collection assembled by ...19
18BP8/1Mail service (contract) files, either annual s...19
19A13860Medical Documents - Army (Department of Defenc...19
20SP908/1Application for Registration of Aliens (other ...18
21A446Correspondence files, annual single number ser...18
22J539Correspondence files, multiple number series.16
23A1877British migrants - Selection documents for fre...14
24J26Medical case files, single number series with ...13
\n", "
" ], "text/plain": [ " series series_title count\n", "0 B884 Citizen Military Forces Personnel Dossiers, 19... 20382\n", "1 A9301 RAAF Personnel files of Non-Commissioned Offic... 1207\n", "2 B883 Second Australian Imperial Force Personnel Dos... 515\n", "3 A10605 Personnel Occurrence Reports 396\n", "4 A6135 Photographic colour transparencies positives, ... 226\n", "5 D4881 Alien registration cards, alphabetical series 66\n", "6 MP367/1 General correspondence files 48\n", "7 A9300 RAAF Officers Personnel files, 1921-1948 47\n", "8 A12372 RAAF Personnel files - All Ranks [Main corresp... 40\n", "9 BP5/2 Drawings of inventions for letters patent, sin... 37\n", "10 B78 Alien registration documents 36\n", "11 A2478 Non-British European migrant selection documents 34\n", "12 MP84/1 Correspondence files, multiple number series 30\n", "13 BP371/1 Correspondence registration booklets and cards 26\n", "14 A705 Correspondence files, multiple number (Melbour... 25\n", "15 A471 Courts-Martial files [including war crimes tri... 23\n", "16 J3111 Queensland post office history files, alphabet... 23\n", "17 J3109 Historic photographic collection assembled by ... 19\n", "18 BP8/1 Mail service (contract) files, either annual s... 19\n", "19 A13860 Medical Documents - Army (Department of Defenc... 19\n", "20 SP908/1 Application for Registration of Aliens (other ... 18\n", "21 A446 Correspondence files, annual single number ser... 18\n", "22 J539 Correspondence files, multiple number series. 16\n", "23 A1877 British migrants - Selection documents for fre... 14\n", "24 J26 Medical case files, single number series with ... 13" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series = df.value_counts([\"series\", \"series_title\"]).to_frame().reset_index()\n", "series.columns = [\"series\", \"series_title\", \"count\"]\n", "series[:25]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can see that the most of the files come from just four series containing miltary service records. This reflects the NAA's [current digitisation priorities](https://www.naa.gov.au/about-us/media-and-publications/media-releases/national-archives-signs-contracts-worth-44m-digitise-second-world-war-service-records)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## The long tail\n", "\n", "Let's go to the other end of the dataset and look at the series that appear 20 or less times." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seriesseries_titlecount
17J3109Historic photographic collection assembled by ...19
18BP8/1Mail service (contract) files, either annual s...19
19A13860Medical Documents - Army (Department of Defenc...19
20SP908/1Application for Registration of Aliens (other ...18
21A446Correspondence files, annual single number ser...18
............
369BP190/4'RT' series rifle range tenure correspondence ...1
370BP242/1Correspondence files relating to national secu...1
371BP25/1Alien registration papers, alphabetical series...1
372BP460/3Main Trust files annual single number series1
373C424General correspondence files, annual single nu...1
\n", "

357 rows × 3 columns

\n", "
" ], "text/plain": [ " series series_title count\n", "17 J3109 Historic photographic collection assembled by ... 19\n", "18 BP8/1 Mail service (contract) files, either annual s... 19\n", "19 A13860 Medical Documents - Army (Department of Defenc... 19\n", "20 SP908/1 Application for Registration of Aliens (other ... 18\n", "21 A446 Correspondence files, annual single number ser... 18\n", ".. ... ... ...\n", "369 BP190/4 'RT' series rifle range tenure correspondence ... 1\n", "370 BP242/1 Correspondence files relating to national secu... 1\n", "371 BP25/1 Alien registration papers, alphabetical series... 1\n", "372 BP460/3 Main Trust files annual single number series 1\n", "373 C424 General correspondence files, annual single nu... 1\n", "\n", "[357 rows x 3 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series.loc[series[\"count\"] < 21]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seriesseries_titlecount
164ST1233/1Investigation files, single number series with...1
165K26Personal case files, single number series with...1
166J992Mail Service files, North Queensland, single n...1
167K60Personal case files, single number with 'M' an...1
168K269Inward passenger manifests for ships and aircr...1
............
369BP190/4'RT' series rifle range tenure correspondence ...1
370BP242/1Correspondence files relating to national secu...1
371BP25/1Alien registration papers, alphabetical series...1
372BP460/3Main Trust files annual single number series1
373C424General correspondence files, annual single nu...1
\n", "

210 rows × 3 columns

\n", "
" ], "text/plain": [ " series series_title count\n", "164 ST1233/1 Investigation files, single number series with... 1\n", "165 K26 Personal case files, single number series with... 1\n", "166 J992 Mail Service files, North Queensland, single n... 1\n", "167 K60 Personal case files, single number with 'M' an... 1\n", "168 K269 Inward passenger manifests for ships and aircr... 1\n", ".. ... ... ...\n", "369 BP190/4 'RT' series rifle range tenure correspondence ... 1\n", "370 BP242/1 Correspondence files relating to national secu... 1\n", "371 BP25/1 Alien registration papers, alphabetical series... 1\n", "372 BP460/3 Main Trust files annual single number series 1\n", "373 C424 General correspondence files, annual single nu... 1\n", "\n", "[210 rows x 3 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "series.loc[series[\"count\"] == 1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So 357 of 375 series (that's 95%) appear 20 or less times. That's a classic 'long tail', and presumably reflects the diversity of interests that fuel 'digitisation on demand' requests. But this really needs more analysis\n", "\n", "We can visualise the long tail by using a logarithmic scale to display the count. You'll see that most series only have one digitised file (mouse over the bars for series details)." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "alt.Chart(series).mark_bar(size=2).encode(\n", " x=alt.X(\"series\", sort=\"-y\", axis=alt.Axis(labels=False, ticks=False)),\n", " y=alt.Y(\"count\", scale=alt.Scale(type=\"symlog\")),\n", " tooltip=[\"series\", \"series_title\", \"count\"],\n", ").properties(width=800)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Other possibilities\n", "\n", "Once I've accumulated a longer record of digitisation it'll be interesting to see how things change over time. It would also be possible to use my RecordSearch Tools to find out how many pages there are in each digitised file.\n", "\n", "Of course you could other things with this data, such as setting up an RSS feed for updates, or creating a Twitter bot sharing recently-digitised files. Hmmm..." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "----\n", "\n", "Created by [Tim Sherratt](https://timsherratt.org) for the [GLAM Workbench](https://glam-workbench.github.io/), 2021.\n", "\n", "[Sponsor me on GitHub!](https://github.com/sponsors/wragge/)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { "0ade3017fe974a7792ebe48b5b0a0282": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": {} }, "179208d1116942f49bb2f57db2809317": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": {} }, "1ce5f2d2f9854bfd8aeb06a098ce63fe": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "children": [ "IPY_MODEL_2ea3513ed6a7479f9d39b213d5284c77", "IPY_MODEL_48798f19312d4ea38b188ba0f1400049", "IPY_MODEL_97fe65a5a70b445f8ad360485d70a712" ], "layout": "IPY_MODEL_179208d1116942f49bb2f57db2809317" } }, "2562bb2ce4ca4c2aa30ae1f0ec3e6417": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "layout": "IPY_MODEL_cc564e7b87a04fcdbff83908a05c21b8", "style": "IPY_MODEL_53b16c8097704618a0214465fef23711", "value": "100%" } }, "2a9a2f508a334d2c8269114997433163": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "description_width": "" } }, "2ea3513ed6a7479f9d39b213d5284c77": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "layout": "IPY_MODEL_0ade3017fe974a7792ebe48b5b0a0282", "style": "IPY_MODEL_f83b7fc2902f48faaa7985aad4b23312", "value": "100%" } }, "4055e5dadd854acea3f0ccdf6e954219": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": {} }, "48798f19312d4ea38b188ba0f1400049": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "bar_style": "success", "layout": "IPY_MODEL_4055e5dadd854acea3f0ccdf6e954219", "max": 347, "style": "IPY_MODEL_6ec0bb2e05764a2ea5c248cb5c2f8991", "value": 347 } }, "53b16c8097704618a0214465fef23711": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "description_width": "" } }, "6ec0bb2e05764a2ea5c248cb5c2f8991": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "description_width": "" } }, "905e3def5925477ba18d1ce878250b5f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": {} }, "9296dca23e1a43eb9d7fc0f96f1d3412": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "bar_style": "success", "layout": "IPY_MODEL_d3940ebda558487eb06b77753a907421", "max": 10766, "style": "IPY_MODEL_c5849274da8e43039c3290c5d32fb5fd", "value": 10766 } }, "97fe65a5a70b445f8ad360485d70a712": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "layout": "IPY_MODEL_de1d2d6bb707426ba741e60cf8d158e2", "style": "IPY_MODEL_2a9a2f508a334d2c8269114997433163", "value": " 347/347 [03:40<00:00, 1.60it/s]" } }, "aa3b8962de6a4e678995858ed88d9cd0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "layout": "IPY_MODEL_d268e42338414a668dd2f9c31e0e2322", "style": "IPY_MODEL_f51ab7fe7e0e41e2a8fca1a1d45bf367", "value": " 10766/10766 [02:27<00:00, 68.03it/s]" } }, "c5849274da8e43039c3290c5d32fb5fd": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "description_width": "" } }, "cc564e7b87a04fcdbff83908a05c21b8": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": {} }, "d2516433a4664b1aa18e11456992f165": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "children": [ "IPY_MODEL_2562bb2ce4ca4c2aa30ae1f0ec3e6417", "IPY_MODEL_9296dca23e1a43eb9d7fc0f96f1d3412", "IPY_MODEL_aa3b8962de6a4e678995858ed88d9cd0" ], "layout": "IPY_MODEL_905e3def5925477ba18d1ce878250b5f" } }, "d268e42338414a668dd2f9c31e0e2322": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": {} }, "d3940ebda558487eb06b77753a907421": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": {} }, "de1d2d6bb707426ba741e60cf8d158e2": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": {} }, "f51ab7fe7e0e41e2a8fca1a1d45bf367": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "description_width": "" } }, "f83b7fc2902f48faaa7985aad4b23312": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "description_width": "" } } }, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 4 }