In [1]:
try:
    import IPython
except:
    %pip install IPython
    import IPython 
from IPython.display import display, IFrame, HTML, Javascript
HTML("""<link rel="stylesheet" type="text/css" href="src/css/notebook.css"/>""")

#  Transforming Collections Data to Linked Art 
# National Gallery of Art

## Input Data

The collection data exists into two files:
- CSV data file containing artwork description  [data file](./data/nga/input/nga_ruskin.csv)
- CSV data file containing detailed digital image information for artworks https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/published_images.csv


 #### Further Reading 
 
- National Gallery of Art https://www.nga.gov/
- NGA GitHub https://github.com/NationalGalleryOfArt
- The input data file is from https://github.com/NationalGalleryOfArt/opendata/tree/main/data

In [2]:
### Load NGA Collection Data into DataFrame

In [3]:
file = './data/nga/input/objects.csv'

try:
    import pandas as pd
except:
    !pip install pandas
    import pandas as pd
    
mpg = pd.read_csv(file,low_memory=False)
mpg.head()

Unnamed: 0,objectid,accessioned,accessionnum,locationid,title,displaydate,beginyear,endyear,visualbrowsertimespan,medium,...,visualbrowserclassification,parentid,isvirtual,departmentabbr,portfolio,series,volume,watermarks,lastdetectedmodification,customprinturl
0,113260,1,2000.127.20.1-193,,"Lithographs, Volume 9",,1804.0,1866.0,1801 to 1825,book of lithographs,...,volume,,0,CG-E,,,,,2020-05-06 22:01:32.06-04,
1,113833,1,2000.127.3.1-172,,"Lithographs, Volume 12",,1804.0,1866.0,1801 to 1825,book of lithographs,...,volume,,0,CG-E,,,,,2020-05-06 22:01:32.06-04,
2,114640,1,2000.127.8.1-110,,"Lithographs, Volume 17",,1804.0,1866.0,1801 to 1825,book of lithographs,...,volume,,0,CG-E,,,,,2020-05-06 22:01:32.06-04,
3,114855,1,2000.127.10.1-28,,"Lithographs, Volume 19",,1804.0,1866.0,1801 to 1825,book of lithographs,...,volume,,0,CG-E,,,,,2020-05-06 22:01:32.06-04,
4,119191,1,2001.100.2.b,,Studies of Lago Maggiore and and the Entrance ...,c. 1700,1700.0,1700.0,1651 to 1700,brown ink over graphite on laid paper,...,drawing,119190.0,0,CG-E,,,,,2019-10-28 22:01:34.883-04,


### Load NGA Digital Image File into DataFrame

The data file containing detailed digital image data is loaded into a pandas dataframe `dataFrameNGAImages`

In [4]:
file_images = "https://raw.githubusercontent.com/NationalGalleryOfArt/opendata/main/data/published_images.csv"
df_images = pd.read_csv(file_images)
df_images.head()

Unnamed: 0,uuid,iiifurl,iiifthumburl,viewtype,sequence,width,height,maxpixels,created,modified,depictstmsobjectid,assistivetext
0,00004dec-8300-4487-8d89-562d0126b6a1,https://api.nga.gov/iiif/00004dec-8300-4487-8d...,https://api.nga.gov/iiif/00004dec-8300-4487-8d...,primary,0.0,2623,4000,640.0,2010-09-07 15:08:48-04,2022-04-21 12:57:43.657-04,11975,
1,00007f61-4922-417b-8f27-893ea328206c,https://api.nga.gov/iiif/00007f61-4922-417b-8f...,https://api.nga.gov/iiif/00007f61-4922-417b-8f...,primary,0.0,3365,4332,,2013-07-05 15:41:08-04,2022-05-23 14:59:28-04,17387,
2,0000bd8c-39de-4453-b55d-5e28a9beed38,https://api.nga.gov/iiif/0000bd8c-39de-4453-b5...,https://api.nga.gov/iiif/0000bd8c-39de-4453-b5...,primary,0.0,3500,4688,,2013-08-05 14:31:59-04,2022-05-23 15:05:58-04,19245,
3,0000e5a4-7d32-4c2a-97c6-a6b571c9fd71,https://api.nga.gov/iiif/0000e5a4-7d32-4c2a-97...,https://api.nga.gov/iiif/0000e5a4-7d32-4c2a-97...,primary,0.0,2252,3000,,2013-03-18 14:39:55-04,2022-05-17 18:19:25-04,153987,
4,0001668a-dd1c-48e8-9267-b6d1697d43c8,https://api.nga.gov/iiif/0001668a-dd1c-48e8-92...,https://api.nga.gov/iiif/0001668a-dd1c-48e8-92...,primary,0.0,3446,4448,,2014-01-02 14:50:50-05,2022-05-23 15:39:38-04,23830,


### Remove Byte Order Marks and Define Data Mapping

Remove Byte Order Marks and create Python dictionary containing data mapping for each input file.

In [5]:
import csv
try:
    import json
except:
    !pip install json
    import json 
    
    
#remove BOM
s = open(file, mode='r', encoding='utf-8-sig').read()
open(file, mode='w', encoding='utf-8').write(s)

allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8'))

for obj in allObjects:
    print(json.dumps(obj,indent=2))
    break 

{
  "objectid": "113260",
  "accessioned": "1",
  "accessionnum": "2000.127.20.1-193",
  "locationid": "",
  "title": "Lithographs, Volume 9",
  "displaydate": "",
  "beginyear": "1804",
  "endyear": "1866",
  "visualbrowsertimespan": "1801 to 1825",
  "medium": "book of lithographs",
  "dimensions": "",
  "inscription": "",
  "markings": "",
  "attributioninverted": "Gavarni, Paul",
  "attribution": "Paul Gavarni",
  "creditline": "Ailsa Mellon Bruce Fund",
  "classification": "Volume",
  "subclassification": "",
  "visualbrowserclassification": "volume",
  "parentid": "",
  "isvirtual": "0",
  "departmentabbr": "CG-E",
  "portfolio": "",
  "series": "",
  "volume": "",
  "watermarks": "",
  "lastdetectedmodification": "2020-05-06 22:01:32.06-04",
  "customprinturl": ""
}


### Transform to JSON-LD 

This next step uses the following to transform the collections data to Linked Art JSON-LD
- the data mapping
- custom coding in createObjProp()
- cromulant Python library
- custom coding in la including createObjDescription()

The URLs for the artwork digital images are in a separate file. With custom coding in `createObjProp()` the rows in the two collection data files are mapped to extract the digital image url.

<pre>
    matchImages = dataFrameNGAImages.query('depictstmsobjectid == ' + objProp["id"] )
    objProp["image_url"] = matchImages["iiifurl"].iloc[0]  + "/full/!500,500/0/default.jpg"
</pre>


Additional custom code creates a web page URL for the artwork:

<pre>
objProp["homepage"] = "https://www.nga.gov/collection/art-object-page." + id + ".html"   
</pre>

In [6]:
  
 mapp =  {
    "id":"objectid",
    "accession_number":"accessionnum",
    "accession_date": "",
    "classification" : "classification",
    "title": "title",
    "alt_title": "",
    "notes": "",
    "date_created":"displaydate",
    "date_created_earliest": "beginyear",
    "date_created_latest": "endyear",
    "created_period":"",
    "created_dynasty":"",
    "created_inscriptions":"",
    "created_notes": "",
    "creator":"attribution",
    "physical_medium": "medium",
    "physical_style": "",
    "physical_technique": "",
    "physical_description": "",
    "physical_dimensions": "dimensions",
    "created_provenance": "" ,
    "credit_line": "creditline",
    "collection" : "departmentabbr",
    "current_status" : "",
    "current_owner" : "",
     "image_url": "",
     "homepage": ""
}

# display transposed dataframe of data mapping
display(pd.DataFrame(mapp, index=[0]).T)

Unnamed: 0,0
id,objectid
accession_number,accessionnum
accession_date,
classification,classification
title,title
alt_title,
notes,
date_created,displaydate
date_created_earliest,beginyear
date_created_latest,endyear


In [7]:
#  baseURI for JSON-LD document
baseURI = "https://www.nga.gov/collection/"


def createObjProp(obj,mapp,baseURI):
    objProp = {}
    csv_keys = list(obj.keys())
    for key in csv_keys:
        for prop in mapp:
            if key == mapp[prop]:
                if prop == "creator":
                    objProp[prop] = [{"id": baseURI +"creatorid/" + obj[mapp["id"]] ,"name": obj[key],"role":"Artist"}]
                else:
                    objProp[prop] = obj[key]
    objProp["homepage"] = ""
    objProp["current_owner"] = {"name":"National Gallery of Art",
                                "location":"Washington, D.C., United States",
                                "type": "http://vocab.getty.edu/aat/300312281" ,
                                "type_label": ""}
    return objProp   

In [8]:
from lib import linkedart as la


try:
    import cromulent
except:
    !pip install cromulent
    import cromulent
from cromulent.model import factory


outputdir = "./data/nga/output/json/all/"

# list to hold file names for use with jsonld visualisation dropdown
selectOptions = []
selectOptions = [('Please select an artwork', '')]



dfimg_list = df_images['depictstmsobjectid'].tolist()
dfimgurl_list = df_images['iiifurl'].tolist()

counter = 1

for obj in allObjects:
    if counter > 100:
        break
    # create object property dictionary
    objProp = createObjProp(obj,mapp,baseURI)
    
    id = objProp["id"]
    object_uri = baseURI + id
    
    if int(id) in dfimg_list:
        df_images_match = df_images.loc[df_images['depictstmsobjectid'] == int(id)]
        objProp["image_url"] = df_images_match.iloc[0]["iiifurl"] + "/full/!500,500/0/default.jpg"
    
        filename = objProp["id"] + ".json"
        selectOptions.append( ( objProp["title"] + " (" + filename + ")" , filename))
        # create obj description
        objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
    
    
        # write to file 
        text_file = open(outputdir + filename, "wt")
        n = text_file.write(factory.toString(objLA, compact=False))
   
        text_file.close()
        counter = counter + 1
    

### Explore the Linked Art JSON-LD files

Select an artwork from the dropdown to view 
- the artwork image
- a visualisation of the Linked Art JSON-LD representation created above

In [9]:
try:
    import ipywidgets
except:
    %pip install ipywidgets
    import ipywidgets

from ipywidgets import Layout, FileUpload 
from IPython.display import display, IFrame, HTML, Image
from IPython.core.display import Javascript        
 
import os

try:
    import json
except:
    %pip install json
    import json 
    
   
def dropdown_eventhandler(change):
    with open('./src/js/visld.js', 'r') as _jscript:
        code = _jscript.read() + "var file = '" + outputdir + change.new + "';var selector = '#visnga';visjsonld(file, selector); "
        display(Javascript(code))
    
        with open( outputdir + "/" + change.new) as json_file:
    
                artwork = json.load(json_file)
                if ("representation" in artwork):
                    image = artwork["representation"][0]["id"]
                    display(Javascript("document.getElementById('artworknga').src = '" + image   +   "';"))
                else:
                    display(Javascript("document.getElementById('artworknga').src = '';"))
                    

selectObject = ipywidgets.Dropdown(options=selectOptions)
selectObject.observe(dropdown_eventhandler, names='value')

display(selectObject)

Dropdown(options=(('Please select an artwork', ''), ('Studies of Lago Maggiore and and the Entrance to a Palaz…

<div><img style="height:500px" id="artworknga" src=""/></div>

<div id='visnga' style='height:100%;width:6000px'></div>