# Exploring unpublished works that might be entering the public domain on 1 January 2019

For more information on the data and how it was harvested see [Finding unpublished works entering the public domain](Finding-unpublished-works-entering-public-domain.ipynb).

In [2]:
import pandas as pd
import requests
from IPython.display import display, HTML

In [3]:
df = pd.read_csv('unpublished_works_entering_pd_20181006.csv', keep_default_na=False)
df.head()

Unnamed: 0,creator,date,id,nuc,title,trove_url
0,"Kelly, F. S. (Frederick Septimus), 1881-1916",1893-1926,10201266,ANL,Music manuscripts,https://trove.nla.gov.au/work/10201266
1,,1863-1925,10544890,ANL,Collection of promissory notes from remote are...,https://trove.nla.gov.au/work/10544890
2,"Gugeri, Peter Anthony, 1845-1930",1863-1910,14022030,WLB,Gugeri family papers,https://trove.nla.gov.au/work/14022030
3,"Kruse, Johann Secundus, 1859-1927",1870-1927,14952244,ANL,Papers of Johann Kruse,https://trove.nla.gov.au/work/14952244
4,"Freycinet, Rose Marie de, d. 1832",1802-1927,152218670,ANL,Documents relating to Louis and Rose de Freycinet,https://trove.nla.gov.au/work/152218670


## Add institution names

In [5]:
nuc_list = list(df['nuc'].unique())
nucs = []
api_url = 'http://api.trove.nla.gov.au/v2/contributor/{}/'

params = {
    'encoding': 'json',
    'key': 'ADD YOUR API KEY HERE'
}

for nuc in nuc_list:
    if nuc:
        response = requests.get(api_url.format(nuc), params=params)
        data = response.json()
        name = data['contributor']['name']
        nucs.append({'nuc': nuc, 'institution': name})

In [86]:
nucs_df = pd.DataFrame(nucs)
df_new = pd.merge(df, nucs_df, on='nuc')
df_new.head()

Unnamed: 0,creator,date,id,nuc,title,trove_url,institution
0,"Kelly, F. S. (Frederick Septimus), 1881-1916",1893-1926,10201266,ANL,Music manuscripts,https://trove.nla.gov.au/work/10201266,National Library of Australia.
1,,1863-1925,10544890,ANL,Collection of promissory notes from remote are...,https://trove.nla.gov.au/work/10544890,National Library of Australia.
2,"Kruse, Johann Secundus, 1859-1927",1870-1927,14952244,ANL,Papers of Johann Kruse,https://trove.nla.gov.au/work/14952244,National Library of Australia.
3,"Freycinet, Rose Marie de, d. 1832",1802-1927,152218670,ANL,Documents relating to Louis and Rose de Freycinet,https://trove.nla.gov.au/work/152218670,National Library of Australia.
4,"Dickens, Charles, 1812-1870",1869,168122181,ANL,Cheque written by Charles Dickens,https://trove.nla.gov.au/work/168122181,National Library of Australia.


## Top 50 institutions

In [87]:
institutions = df_new.groupby(['nuc', 'institution']).size().sort_values(ascending=False).to_frame().reset_index()
institutions.columns = ['nuc', 'institution', 'count']
institutions[:50]

Unnamed: 0,nuc,institution,count
0,QSA,Queensland State Archives.,2965
1,TSL,Libraries Tasmania.,2112
2,ANL,National Library of Australia.,1781
3,VSL,State Library Victoria.,1642
4,WLB,State Library of Western Australia.,779
5,QBCL:BI,Brisbane City Council. Brisbane Images.,709
6,NSL,State Library of NSW.,643
7,QSL,State Library of Queensland.,630
8,SSL,State Library of South Australia.,375
9,QU,The University of Queensland. University of Qu...,268


## Filter by title keyword

In [96]:
title_kw = 'federation' # Edit to add your own keyword!
filtered_title = df_new.loc[df_new['title'].str.contains(title_kw, case=False, na=False)]
filtered_title

Unnamed: 0,creator,date,id,nuc,title,trove_url,institution
654,,1892-1909,10918152,ANL,Federation - Australia,https://trove.nla.gov.au/work/10918152,National Library of Australia.
752,,1947,159853683,ANL,Communist influence in Teachers Federation,https://trove.nla.gov.au/work/159853683,National Library of Australia.
1140,"Barton, Edmund Sir, 1849-1920",1897-1898,23733819,ANL,Letter and handbills relating to Australian fe...,https://trove.nla.gov.au/work/23733819,National Library of Australia.
1298,,1899,27029040,ANL,Synopsis of the Commonwealth Bill : approved b...,https://trove.nla.gov.au/work/27029040,National Library of Australia.
1312,,1890-1891,28833077,ANL,"The federation movement, and proceedings of th...",https://trove.nla.gov.au/work/28833077,National Library of Australia.
1313,,1895-1901,28833285,ANL,The Australian federation movement from a Sout...,https://trove.nla.gov.au/work/28833285,National Library of Australia.
1683,,1895-1901,5891330,ANL,The Australian federation movement from a Sout...,https://trove.nla.gov.au/work/5891330,National Library of Australia.
1696,"Henry, John, 1834-1912",1899,6050113,ANL,[Comments of the honourable John Henry dealing...,https://trove.nla.gov.au/work/6050113,National Library of Australia.
2059,,1900,191699908,WLB,Records of Australian Federation of Friends of...,https://trove.nla.gov.au/work/191699908,State Library of Western Australia.
4774,,1928,200344170,QSL,Waterside Workers Federation Cutting Book,https://trove.nla.gov.au/work/200344170,State Library of Queensland.


In [97]:
# Save filtered results as a CSV file
filtered_title.to_csv('filtered_title_{}.csv'.format(title_kw), index=False)
# Make a download link
display(HTML('<a target="_blank" href="{}">Download CSV file</a>'.format('filtered_title_{}.csv'.format(title_kw))))

## Filter by creator name

In [98]:
creator_kw = 'barton' # Edit to add your own name!
filtered_creator = df_new.loc[df_new['creator'].str.contains(creator_kw, case=False, na=False)]
filtered_creator

Unnamed: 0,creator,date,id,nuc,title,trove_url,institution
28,"Barton, Edmund, Sir, 1849-1920",1827-1940,27034442,ANL,Papers of Sir Edmund Barton,https://trove.nla.gov.au/work/27034442,National Library of Australia.
161,"Barton, Edmund, Sir, 1849-1920",1827-1940,14890081,ANL,Papers of Sir Edmund Barton,https://trove.nla.gov.au/work/14890081,National Library of Australia.
237,"Barton, Edmund Sir, 1849-1920",1902,21003348,ANL,Correspondence between Edmund Barton and Capta...,https://trove.nla.gov.au/work/21003348,National Library of Australia.
938,"Paterson, A. B. (Andrew Barton), 1864-1941",1905-1969,20843868,ANL,Material relating to Waltzing Matilda,https://trove.nla.gov.au/work/20843868,National Library of Australia.
1008,"Barton, Edmund Sir, 1849-1920",1887-1947,21730412,ANL,"Letters, papers, bill",https://trove.nla.gov.au/work/21730412,National Library of Australia.
1140,"Barton, Edmund Sir, 1849-1920",1897-1898,23733819,ANL,Letter and handbills relating to Australian fe...,https://trove.nla.gov.au/work/23733819,National Library of Australia.
1218,"Barton, Edmund Sir, 1849-1920",1901,24351962,ANL,Speech delivered by Barton at the opening of F...,https://trove.nla.gov.au/work/24351962,National Library of Australia.
1291,"Barton, G. B. (George Burnett), 1836-1901",1800-1899,24904338,ANL,Manuscripts,https://trove.nla.gov.au/work/24904338,National Library of Australia.
1396,"Paterson, A. B. (Andrew Barton), 1864-1941",1896,34380059,ANL,"Letter : Sydney to Thos. Whitby, Blackheath",https://trove.nla.gov.au/work/34380059,National Library of Australia.
1412,"Barton, Edmund Sir, 1849-1920",1901,34392819,ANL,Letters,https://trove.nla.gov.au/work/34392819,National Library of Australia.


In [100]:
# Save filtered results as a CSV file
filtered_creator.to_csv('filtered_creator_{}.csv'.format(creator_kw), index=False)
# Make a download link
display(HTML('<a target="_blank" href="{}">Download CSV file</a>'.format('filtered_creator_{}.csv'.format(creator_kw))))

## Filter by institution (nuc)

In [92]:
nuc = 'ANL' # Edit to add your own nuc!
filtered_nuc = df_new.loc[df_new['nuc'] == nuc]
filtered_nuc

Unnamed: 0,creator,date,id,nuc,title,trove_url,institution
0,"Kelly, F. S. (Frederick Septimus), 1881-1916",1893-1926,10201266,ANL,Music manuscripts,https://trove.nla.gov.au/work/10201266,National Library of Australia.
1,,1863-1925,10544890,ANL,Collection of promissory notes from remote are...,https://trove.nla.gov.au/work/10544890,National Library of Australia.
2,"Kruse, Johann Secundus, 1859-1927",1870-1927,14952244,ANL,Papers of Johann Kruse,https://trove.nla.gov.au/work/14952244,National Library of Australia.
3,"Freycinet, Rose Marie de, d. 1832",1802-1927,152218670,ANL,Documents relating to Louis and Rose de Freycinet,https://trove.nla.gov.au/work/152218670,National Library of Australia.
4,"Dickens, Charles, 1812-1870",1869,168122181,ANL,Cheque written by Charles Dickens,https://trove.nla.gov.au/work/168122181,National Library of Australia.
5,"Paton, John Gibson, 1824-1907",1858-1891,179335696,ANL,"Journal, research notes and correspondence fro...",https://trove.nla.gov.au/work/179335696,National Library of Australia.
6,"Paton, Maggie Whitecross, 1841?-1905",1870-1943,179335711,ANL,Journals and correspondence from the New Hebrides,https://trove.nla.gov.au/work/179335711,National Library of Australia.
7,"Frazer, Charles Edward, 1880–1913",1910-1913,193288592,ANL,"Early Commonwealth currency, 1910-1913",https://trove.nla.gov.au/work/193288592,National Library of Australia.
8,"Norcock, John Henry, 1809-1854",1835-1837,20308332,ANL,Diary of John Henry Norcock,https://trove.nla.gov.au/work/20308332,National Library of Australia.
9,"Nobbs, George Hunn, 1799-1884",1845-1886,20850242,ANL,Pitcairn and Norfolk Island correspondence,https://trove.nla.gov.au/work/20850242,National Library of Australia.


In [95]:
# Save filtered results as a CSV file
filtered_nuc.to_csv('filtered_nuc_{}.csv'.format(nuc), index=False)
# Make a download link
display(HTML('<a target="_blank" href="{}">Download CSV file</a>'.format('filtered_nuc_{}.csv'.format(nuc))))