# Files digitised in the last week

Each Sunday I'm automatically harvesting details of files digitised by the NAA in the previous week. You can view the results [in this repository](https://github.com/wragge/naa-recently-digitised). This notebook analyses the most recent harvest to provide a summary of the results.

In [19]:
import datetime
from urllib.error import HTTPError

import arrow
import pandas as pd
from IPython.display import display
from recordsearch_data_scraper.scrapers import RSSeries
from tqdm.auto import tqdm

In [20]:
# Find the date of the most recent Sunday
today = arrow.now().to("Australia/Sydney")
# Today is Sunday and it's past 2pm so the harvest should have run
if today.weekday() == 6 and today.time() >= datetime.time(14, 0, 0, 0):
 harvest_day = today
# Otherwise get last Sunday
else:
 harvest_day = arrow.now().to("Australia/Sydney").shift(weekday=6).shift(weeks=-1)

print(f'Harvested on {harvest_day.format("dddd, D MMMM YYYY")}.')

Harvested on Sunday, 15 January 2023.


In [21]:
try:
 df = pd.read_csv(
 f'https://raw.githubusercontent.com/wragge/naa-recently-digitised/master/data/digitised-week-ending-{harvest_day.format("YYYYMMDD")}.csv'
 )
except HTTPError:
 df = pd.DataFrame()

In [22]:
df.shape

(0, 0)

In [23]:
if not df.empty:
 df["series"].value_counts()[:10]

In [24]:
if not df.empty:
 series_list = list(df["series"].unique())

 cited_series = []
 for series in tqdm(series_list):
 data = RSSeries(
 series, include_number_digitised=False, include_access_status=False
 ).data
 cited_series.append({"series": series, "series_title": data["title"]})

 df_titles = pd.merge(df, pd.DataFrame(cited_series), how="left", on="series")

 with pd.option_context("display.max_colwidth", 100):
 df_titles = (
 df_titles.value_counts(["series", "series_title"]).to_frame().reset_index()
 )
 df_titles.columns = ["series", "series_title", "total"]
 display(df_titles[:20])
 totals = ""
 for title in df_titles[:20].itertuples():
 totals += (
 f"{title.series}, {title.series_title}, {title.total} files digitised; "
 )
 print(totals)