In [2]:
from IPython.core.display import HTML
HTML("""
<style>
div {font-family:Open Sans,sans-serif;}
h1 {color:white;background-color: #446a7f;padding:10px}
h2{color:black; background-color:#b3b6a0; padding:10px}
h3{color:#78BE21; background-color:#446a7f; padding:10px}
h4{color:white; background-color:#78BE21;padding:5px}
.info{background-color:}
img {border: 1px;border-color:black;}
</style>
""")

#  Collections Data to Linked Art - Cleveland Museum of Art

This Jupyter notebook transforms collection data from the Cleveland Museum of Art to a Linked Art representation in a JSON-LD format.

## Collection Data

The input data file is available from the Cleveland Museum of Art's GitHub repository:
https://github.com/ClevelandMuseumArt/openaccess

## Transformation Process

1. Read CSV file
2. Convert CSV file to Python Dictionary
3. Create a field mapping between the CSV file fields and the Linked Art data model
4. Create an object property dictionary

In [2]:
try:
    import IPython
except:
    %pip install IPython
    import IPython   
    
from IPython.display import display,IFrame,HTML, Javascript
 
try:
    import json
except:
    %pip install json
    import json 
  
try:
    import csv
except:
    %pip install csv
    import csv
    
import os

try:
    import cromulent 
except:
    %pip install cromulent
    import cromulent

from cromulent.model import factory
    
import lib.linkedart as la 

try:
    import pandas as pd
except:
    %pip install pandas
    import pandas as pd
    
    

### Parse Collection Data CSV File

In [3]:
file = './data/cma/input/data.csv'
mpg = pd.read_csv(file,low_memory=False)
mpg.head()

Unnamed: 0,id,accession_number,share_license_status,tombstone,current_location,title,title_in_original_language,series,series_in_original_language,creation_date,...,digital_description,wall_description,external_resources,citations,catalogue_raisonne,url,image_web,image_print,image_full,updated_at
0,74539,2015.449,CC0,"A Miller's Carriage, c. 1895. Albert-Charles L...",,A Miller's Carriage,,,,c. 1895,...,,,"{'wikidata': [], 'internet_archive': ['https:/...",,,https://clevelandart.org/art/2015.449,https://openaccess-cdn.clevelandart.org/2015.4...,https://openaccess-cdn.clevelandart.org/2015.4...,https://openaccess-cdn.clevelandart.org/2015.4...,2021-06-29 06:35:50.572000
1,74540,2015.451,CC0,"Leda and the Swan. Adolphe Yvon (French, 1817-...",,Leda and the Swan,,,,,...,In the late 1520s Michelangelo made a painting...,,"{'wikidata': [], 'internet_archive': []}",,,https://clevelandart.org/art/2015.451,https://openaccess-cdn.clevelandart.org/2015.4...,https://openaccess-cdn.clevelandart.org/2015.4...,https://openaccess-cdn.clevelandart.org/2015.4...,2020-11-04 19:07:39.161000
2,74554,2015.447,CC0,"Un Borreau (An Executioner), c. 1848. Auguste ...",,Un Borreau (An Executioner),,,,c. 1848,...,,,"{'wikidata': [], 'internet_archive': []}",,,https://clevelandart.org/art/2015.447,https://openaccess-cdn.clevelandart.org/2015.4...,https://openaccess-cdn.clevelandart.org/2015.4...,https://openaccess-cdn.clevelandart.org/2015.4...,2021-03-27 12:12:37.752000
3,74570,2018.41,CC0,"Profile Portrait of a Man, 18th century. Attri...",,Profile Portrait of a Man,,,,18th century,...,This portrait bust is a counterproof: the reve...,,"{'wikidata': [], 'internet_archive': ['https:/...",,,https://clevelandart.org/art/2018.41,https://openaccess-cdn.clevelandart.org/2018.4...,https://openaccess-cdn.clevelandart.org/2018.4...,https://openaccess-cdn.clevelandart.org/2018.4...,2021-06-29 06:35:50.582000
4,74572,2018.42,CC0,"The Temptation of St. Anthony, 19th century. J...",,The Temptation of St. Anthony,,,,19th century,...,Boilly’s scene represents the Temptations of S...,,"{'wikidata': [], 'internet_archive': []}",,,https://clevelandart.org/art/2018.42,https://openaccess-cdn.clevelandart.org/2018.4...,https://openaccess-cdn.clevelandart.org/2018.4...,https://openaccess-cdn.clevelandart.org/2018.4...,2020-11-04 19:07:40.307000


### Read Collection Data CSV file into Python Dictionary

- Remove Byte Order Mark (BOM) from CSV file 
- Use `csv.DictReader()` to create an object that operates like a regular reader but maps the information in each row to a dict whose keys are given by the optional fieldnames parameter. 



In [4]:
# remove BOM see - https://stackoverflow.com/questions/8898294/convert-utf-8-with-bom-to-utf-8-with-no-bom-in-python
s = open(file, mode='r', encoding='utf-8-sig').read()
open(file, mode='w', encoding='utf-8').write(s)

# open file and read into Python dictionary
allObjects = csv.DictReader(open(file, mode='r',encoding='utf-8'))

display(HTML("<H3>Example Record in Python Dictionary</H3>"))
for obj in allObjects:
    print(json.dumps(obj,indent=2))
    break  

{
  "id": "74539",
  "accession_number": "2015.449",
  "share_license_status": "CC0",
  "tombstone": "A Miller's Carriage, c. 1895. Albert-Charles Lebourg (French, 1849-1928). Black and white chalk with stumping ; sheet: 33.2 x 49.7 cm (13 1/16 x 19 9/16 in.). The Cleveland Museum of Art, Bequest of Muriel Butkin 2015.449",
  "current_location": "",
  "title": "A Miller's Carriage",
  "title_in_original_language": "",
  "series": "",
  "series_in_original_language": "",
  "creation_date": "c. 1895",
  "creation_date_earliest": "1890",
  "creation_date_latest": "1900",
  "creators": "Albert-Charles Lebourg (French, 1849-1928), artist",
  "culture": "France, 19th-20th century",
  "technique": "Black and white chalk with stumping ",
  "support_materials": "gray laid paper",
  "department": "Drawings",
  "collection": "DR - French",
  "type": "Drawing",
  "measurements": "Sheet: 33.2 x 49.7 cm (13 1/16 x 19 9/16 in.)",
  "state_of_the_work": "",
  "edition_of_the_work": "",
  "creditline":

#### Further Reading 

- Python CSV https://docs.python.org/3/library/csv.html
- Byte Order Mark https://en.wikipedia.org/wiki/Byte_order_mark

### Create field mapping

In [5]:



mapp =  {
    "id":"id",
    "accession_number":"accession_number",
    "accession_date": "",
    "classification" : "type",
    "title": "title",
    "alt_title": "title_in_original_language",
    "notes": "tombstone",
    "date_created":"creation_date",
    "date_created_earliest": "creation_date_earliest",
    "date_created_latest": "creation_date_latest",
    "created_period":"culture",
    "created_dynasty":"",
    "created_inscriptions":"inscriptions",
    "created_notes": "fun_fact",
    "creator":"creators",
    "physical_medium": "Medium",
    "physical_style": "",
    "physical_technique": "technique",
    "physical_description": "",
    "physical_dimensions": "measurements",
    "created_provenance": "provenance" ,
    "credit_line": "creditline",
    "collection" : "department",
    "current_status" : "current_location",
    "current_location": "current_location",
    "homepage": "url"
    
}



### Create object property dictionary

In [6]:
def createObjProp(obj,mapp):
    objProp = {}
    csv_keys = list(obj.keys())
    for key in csv_keys:
        for prop in mapp:
            if key == mapp[prop]:
                if prop == "creator":
                    objProp[prop] = {"name": obj[key]}
                else:
                    objProp[prop] = obj[key]
    
    objProp["current_owner"] = {"name":"Cleveland Museum of Art",
                                "location":"Cleveland,Ohio",
                                "type": "http://vocab.getty.edu/aat/300312281" ,
                                "type_label": ""}
    return objProp


#  baseURI for JSON-LD document
baseURI = "https://clevelandart.org/art/"


for index,obj in enumerate(allObjects):
    if index >= 5:
        break
    objProp = createObjProp(obj,mapp)
    id = obj[mapp.get("id")]
    object_uri = baseURI + id
    
    objLA = None
    objLA = la.createObjDesc(objProp,la.objTypes,object_uri)
    # write to file 
    text_file = open("./data/cma/output/json/all/" + id + ".json", "wt")
    n = text_file.write(factory.toString(objLA, compact=False))
    text_file.close()

In [7]:
from IPython.display import display,HTML,Javascript
HTML('<h1>Data Visualisation</h1><div id="example" style="width:3000px;height:100%"/>')

In [8]:
from IPython.core.display import Javascript

code2 = "var file = './data/cma/output/json/all/" + id + ".json';"\
        "var selector = '#example';" \
        "visjsonld(file, selector); "  

with open('src/js/visld.js', 'r') as _jscript:
    code = _jscript.read() + code2

Javascript(code)




<IPython.core.display.Javascript object>

In [9]:
from IPython.core.display import Javascript, HTML


def fn(fpath):       # 1.Get file names from directory
    file_list=os.listdir(r"" + fpath)
    cnt =1 
    for file in file_list:
        cnt = cnt+1
        
        display(HTML("<a target='_new' href='" + fpath + file + "'>" + file + "</a>"))


display(HTML("<h1>File list</h1><p>Click on a link to view created Linked Art JSON-LD file</p>"))
    
fn("./data/cma/output/json/all/")


  