@prefix rdf: . @prefix rdfs: . @prefix xsd: . @prefix owl: . @prefix skos: . @prefix prov: . @prefix sio: . @prefix dcat: . @prefix dcterms: . @prefix void: . @prefix foaf: . @prefix ov: . @prefix setl: . @prefix csvw: . @prefix pv: . @prefix : . @prefix whyis: . :bibtex_input a ; prov:wasGeneratedBy [ a setl:Extract; prov:used [ a ] ]. :bibtex_entries a owl:Class, prov:SoftwareAgent, setl:PythonScript; rdfs:subClassOf prov:Activity; prov:qualifiedDerivation [ a prov:Derivation; prov:entity :bibtex_input; prov:hadRole [ dcterms:identifier "bibtex_file"] ]; prov:value ''' import bibtexparser bib_database = bibtexparser.load(bibtex_file) result = enumerate(bib_database.entries) '''. :create_article_uri a owl:Class, prov:SoftwareAgent, setl:PythonScript; rdfs:subClassOf prov:Activity; prov:value ''' import rdflib global rdflib global base_uri collection = this.graph.value(predicate=rdflib.RDF.type, object=rdflib.URIRef("https://www.iana.org/assignments/media-types/application/x-bibtex")) base_uri = rdflib.Namespace(collection+'/') global doi doi = rdflib.Namespace("http://dx.doi.org/") def result(entry): if 'doi' in entry: return doi[entry['doi']] else: return base_uri[entry['ID']] result.base_uri = base_uri result.collection = collection '''. :author_fn_ln a owl:Class, prov:SoftwareAgent, setl:PythonScript; rdfs:subClassOf prov:Activity; prov:value ''' import re global re def result(author): if ',' in author: ln, fn = re.split('\\\\s*,\\\\s*', author) else: names = re.split('\\\\s+', author) fn = names[0] ln = ' '.join(names[1:]) return fn, ln '''. :create_date a owl:Class, prov:SoftwareAgent, setl:PythonScript; rdfs:subClassOf prov:Activity; prov:value ''' import re global re def result(row): date = row['year'] months = ['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'] if 'month' in row: for i, m in enumerate(months): if m in row['month'].lower(): return date + '-'+str(i+1) return date '''. :bib_resource_types a owl:Class, prov:SoftwareAgent, setl:PythonScript; rdfs:subClassOf prov:Activity; prov:value ''' result = { 'article' : ["dcterms:BibliographicResource",'bibo:AcademicArticle'], 'book' : ["dcterms:BibliographicResource",'bibo:Book'], 'booklet' : ["dcterms:BibliographicResource",'http://purl.org/net/nknouf/ns/bibtex#Booklet'], 'inbook' : ["dcterms:BibliographicResource",'bibo:Chapter'], 'incollection' : ["dcterms:BibliographicResource",'bibo:BookSection'], 'inproceedings' : ["dcterms:BibliographicResource","http://vivoweb.org/ontology/core#ConferencePaper"], 'manual' : ["dcterms:BibliographicResource","bibo:Manual"], 'mastersthesis' : ["dcterms:BibliographicResource",'http://purl.org/net/nknouf/ns/bibtex#Mastersthesis', 'bibo:Thesis'], 'misc' : ["dcterms:BibliographicResource"], 'phdthesis' : ["dcterms:BibliographicResource",'http://purl.org/net/nknouf/ns/bibtex#Phdthesis', 'bibo:Thesis'], 'proceedings' : ["dcterms:BibliographicResource",'bibo:Proceedings'], 'techreport' : ["dcterms:BibliographicResource",'http://purl.org/spar/fabio/TechnicalReport','bibo:Report'], 'unpublished' : ["dcterms:BibliographicResource",'bibo:Manuscript'], } '''. :BibTeXSETL rdfs:subClassOf setl:SemanticETLScript; setl:hasTemplatePrefix "http://vocab.rpi.edu/whyis/setl/bibtex/"; rdfs:subClassOf [ a owl:Restriction; owl:onProperty prov:used; owl:someValuesFrom ]. :bibtex_collection a void:Dataset, dcat:Dataset; prov:wasGeneratedBy [a :BibTeXSETL], [ a setl:Transform, setl:JSLDT; prov:used :bibtex_entries; prov:qualifiedUsage [ a prov:Usage; prov:entity :create_article_uri; prov:hadRole [ dcterms:identifier "create_article_uri"]]; prov:qualifiedUsage [ a prov:Usage; prov:entity :bib_resource_types; prov:hadRole [ dcterms:identifier "bib_resource_types"]]; prov:qualifiedUsage [ a prov:Usage; prov:entity :author_fn_ln; prov:hadRole [ dcterms:identifier "author_fn_ln"]]; prov:qualifiedUsage [ a prov:Usage; prov:entity :create_date; prov:hadRole [ dcterms:identifier "create_date"]]; setl:hasContext '''{ "@vocab" : "http://hadatac.org/ont/chear#", "dataset" : "https://hbgd.tw.rpi.edu/dataset/", "sio" : "http://semanticscience.org/resource/", "skos" : "http://www.w3.org/2004/02/skos/core#", "dcterms" : "http://purl.org/dc/terms/", "prov" : "http://www.w3.org/ns/prov#", "bibo" : "http://purl.org/ontology/bibo/", "rdfs" : "http://www.w3.org/2000/01/rdf-schema#", "foaf" : "http://xmlns.com/foaf/0.1/", "void" : "http://rdfs.org/ns/void#" }'''; prov:value '''[{ "@context" : { "authorList" : {"@id" : "bibo:authorList", "@type": "@list"} }, "@id" : "{{create_article_uri(row)}}", "@type" : [ { "@for" : "t in bib_resource_types[row['ENTRYTYPE']]", "@do" : "{{t}}"}], "dcterms:creator" : [ { "@if" : "'author' in row", "@for" : "author in re.split('\\\\s*and\\\\s*', row['author'])", "@do" : { "@with" : "author_fn_ln(author) as fn, ln", "@do" : { "@id" : "{{create_article_uri.base_uri}}author/{{slugify(' '.join([fn, ln]))}}", "@type" : "foaf:Person", "foaf:familyName" : "{{ln}}", "foaf:givenName" : "{{fn}}", "foaf:name" : "{{fn}} {{ln}}" } } } ], "authorList" : [ { "@if" : "'author' in row", "@for" : "author in re.split('\\\\s*and\\\\s*', row['author'])", "@do" : { "@with" : "author_fn_ln(author) as fn, ln", "@do" : { "@id" : "{{create_article_uri.base_uri}}author/{{slugify(' '.join([fn, ln]))}}" } } } ], "dcterms:abstract" : [ { "@if" : "'abstract' in row", "@value" : "{{row['abstract']}}" } ], "skos:editorialNote" : [ { "@if" : "'annote' in row", "@value" : "{{row['annote']}}" }, { "@if" : "'note' in row", "@value" : "{{row['note']}}" } ], "dcterms:title" : [ { "@if" : "'title' in row", "@value" : "{{row['title']}}" } ], "dcterms:publisher" : [ { "@if" : "'publisher' in row", "@id" : "{{create_article_uri.base_uri}}publisher/{{slugify(row['publisher'])}}", "rdfs:label" : "{{row['publisher']}}", "http://schemas.talis.com/2005/address/schema#localityName" : [ { "@if" : "'address' in row", "@value" : "{{row['address']}}" } ] }, { "@if" : "'institution' in row", "@id" : "{{create_article_uri.base_uri}}institution/{{slugify(row['institution'])}}", "rdfs:label" : "{{row['institution']}}", "http://schemas.talis.com/2005/address/schema#localityName" : [ { "@if" : "'address' in row", "@value" : "{{row['address']}}" } ] }, { "@if" : "'school' in row", "@id" : "{{create_article_uri.base_uri}}institution/{{slugify(row['school'])}}", "rdfs:label" : "{{row['school']}}", "http://schemas.talis.com/2005/address/schema#localityName" : [ { "@if" : "'address' in row", "@value" : "{{row['address']}}" } ] } ], "dcterms:date" : [ { "@if" : "'year' in row", "@value" : "{{create_date(row)}}", "@type" : "xsd:dateTime" } ], "http://prismstandard.org/namespaces/basic/2.1/doi" : [ { "@if" : "'doi' in row", "@value" : "{{row['doi']}}" } ], "dcterms:identifier" : [ { "@if" : "'doi' in row", "@value" : "{{row['doi']}}" } ], "http://prismstandard.org/namespaces/basic/2.1/startingPage" : [ { "@if" : "'pages' in row", "@value" : "{{row['pages'].split('-')[0]}}" } ], "bibo:pageStart" : [ { "@if" : "'pages' in row", "@value" : "{{row['pages'].split('-')[0]}}" } ], "http://prismstandard.org/namespaces/basic/2.1/endingPage" : [ { "@if" : "'pages' in row", "@value" : "{{row['pages'].split('-')[-1]}}" } ], "bibo:pageEnd" : [ { "@if" : "'pages' in row", "@value" : "{{row['pages'].split('-')[-1]}}" } ], "http://prismstandard.org/namespaces/basic/2.1/volume" : [ { "@if" : "'volume' in row", "@value" : "{{row['volume']}}" } ], "bibo:chapter" : [ { "@if" : "'chapter' in row", "@value" : "{{row['chapter']}}" } ], "@reverse" : { "dcterms:hasPart" : { "@id" : "{{create_article_uri.collection}}" } }, "rdfs:seeAlso" : [ { "@if" : "'url' in row", "@id" : "{{row['url']}}" } ], "http://www.w3.org/ns/dcat#downloadURL" : [ { "@if" : "'url' in row", "@id" : "{{row['url']}}" } ], "dcterms:isPartOf" : [ { "@id" : "{{create_article_uri.collection}}", "@type" : "http://purl.org/dc/dcmitype/Collection" }, { "@if" : "'journal' in row", "@id" : "{{create_article_uri.base_uri}}journal/{{slugify(row['journal'])}}", "@type" : "bibo:Journal", "dcterms:title" : "{{row['journal']}}", "dcterms:publisher" : [ { "@if" : "'publisher' in row", "@id" : "{{create_article_uri.base_uri}}publisher/{{slugify(row['publisher'])}}" } ] }, { "@if" : "'booktitle' in row", "@id" : "{{create_article_uri.base_uri}}book/{{slugify(row['booktitle'])}}", "@type" : "bibo:Book", "dcterms:title" : "{{row['booktitle']}}", "bibo:edition" : [ { "@if" : "'edition' in row", "@value" : "{{row['edition']}}" } ], "dcterms:isPartOf" : [ { "@if" : "'series' in row", "@id" : "{{create_article_uri.base_uri}}series/{{slugify(row['series'])}}", "@type" : "bibo:Series", "dcterms:title" : "{{row['series']}}" } ], "dcterms:contributor" : [ { "@if" : "'editor' in row", "@for" : "editor in re.split('\\\\s*and\\\\s*', row['editor'])", "@do" : { "@with" : "author_fn_ln(editor) as ln, fn", "@do" : { "@id" : "{{create_article_uri.base_uri}}author/{{slugify(' '.join([fn, ln]))}}", "@type" : "foaf:Person", "foaf:familyName" : "{{ln}}", "foaf:givenName" : "{{fn}}", "foaf:name" : "{{fn}} {{ln}}" } } } ], "prov:wasAttributedTo" : [ { "@if" : "'organization' in row", "@id" : "{{create_article_uri.base_uri}}organization/{{slugify(row['organization'])}}", "rdfs:label" : "{{row['organization']}}", "@type" : ["skos:Organization", "prov:Organization"] } ], "dcterms:publisher" : [ { "@if" : "'publisher' in row", "@id" : "{{create_article_uri.base_uri}}publisher/{{slugify(row['publisher'])}}" } ] } ] }]''' ].