{ "cells": [ { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2019-09-23T18:50:19.036357Z", "start_time": "2019-09-23T18:50:19.031896Z" } }, "source": [ "# RDF Conversions\n", "\n", "This notebook demonstrates how to [convert](https://nexus-forge.readthedocs.io/en/latest/interaction.html#converting) data in any RDF format (n3, Turtle, ...) to Resources and vice-versa. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-09-23T18:50:20.068658Z", "start_time": "2019-09-23T18:50:19.054054Z" } }, "outputs": [], "source": [ "from kgforge.core import KnowledgeGraphForge" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A configuration file is needed in order to create a KnowledgeGraphForge session. A configuration can be generated using the notebook [00-Initialization.ipynb](00%20-%20Initialization.ipynb)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "forge = KnowledgeGraphForge(\"../../configurations/forge.yml\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Imports" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from kgforge.core import Resource" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Resource to rdflib.Graph" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "jane = Resource(type=\"Person\", name=\"Jane Doe\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " _register_one\n", " True\n" ] } ], "source": [ "forge.register(jane)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "graph = forge.as_graph(jane)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(graph) == 2" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('http://schema.org/name'), rdflib.term.Literal('Jane Doe'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://schema.org/Person'))\n" ] } ], "source": [ "for triple in graph:\n", " print(triple)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "14" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "graph_store_metadata = forge.as_graph(jane, store_metadata=True)\n", "len(graph_store_metadata)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('http://schema.org/name'), rdflib.term.Literal('Jane Doe'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/updatedAt'), rdflib.term.Literal('2022-04-12T22:47:28.984000+00:00', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#dateTime')))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/rev'), rdflib.term.Literal('1', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/createdAt'), rdflib.term.Literal('2022-04-12T22:47:28.984000+00:00', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#dateTime')))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/self'), rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/constrainedBy'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/schemas/unconstrained.json'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/schemaProject'), rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/projects/dke/kgforge'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/createdBy'), rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/realms/bbp/users/sy'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/deprecated'), rdflib.term.Literal('false', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#boolean')))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/updatedBy'), rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/realms/bbp/users/sy'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/outgoing'), rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57/outgoing'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://schema.org/Person'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/project'), rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/projects/dke/kgforge'))\n", "(rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57'), rdflib.term.URIRef('https://bluebrain.github.io/nexus/vocabulary/incoming'), rdflib.term.URIRef('https://bbp.epfl.ch/nexus/v1/resources/dke/kgforge/_/22ec9959-ab76-419a-9864-131b0dc12d57/incoming'))\n" ] } ], "source": [ "for triple in graph_store_metadata:\n", " print(triple)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## rdflib.Graph to Resource" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "#data in n3 format. Other RDF formats can be used\n", "data = '''\n", "@prefix schema: .\n", "[] a schema:Person;\n", " schema:name \"Jane Doe\" ;\n", " schema:knows ;\n", " schema:affiliation .\n", "\n", " a schema:Person;\n", " schema:name \"John Doe\" .\n", " \n", " a schema:Organization;\n", " schema:name \"EPFL\" .\n", "'''" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ ")>" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import rdflib\n", "graph = rdflib.Graph()\n", "graph.parse(data=data, format=\"n3\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(graph)==8" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "resources = forge.from_graph(graph)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(resources) == 3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Apply a JSON-LD Frame" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "frame = {\n", " \"@type\": [ 'http://schema.org/Person'],\n", " \"@embed\": True\n", "} \n", "resources = forge.from_graph(data=graph, frame= frame)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(resources) == 2" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'@context': {'brick': 'https://brickschema.org/schema/Brick#',\n", " 'csvw': 'http://www.w3.org/ns/csvw#',\n", " 'dc': 'http://purl.org/dc/elements/1.1/',\n", " 'dcam': 'http://purl.org/dc/dcam/',\n", " 'dcat': 'http://www.w3.org/ns/dcat#',\n", " 'dcmitype': 'http://purl.org/dc/dcmitype/',\n", " 'dcterms': 'http://purl.org/dc/terms/',\n", " 'doap': 'http://usefulinc.com/ns/doap#',\n", " 'foaf': 'http://xmlns.com/foaf/0.1/',\n", " 'odrl': 'http://www.w3.org/ns/odrl/2/',\n", " 'org': 'http://www.w3.org/ns/org#',\n", " 'owl': 'http://www.w3.org/2002/07/owl#',\n", " 'prof': 'http://www.w3.org/ns/dx/prof/',\n", " 'prov': 'http://www.w3.org/ns/prov#',\n", " 'qb': 'http://purl.org/linked-data/cube#',\n", " 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',\n", " 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',\n", " 'schema': 'https://schema.org/',\n", " 'schema1': 'http://schema.org/',\n", " 'sh': 'http://www.w3.org/ns/shacl#',\n", " 'skos': 'http://www.w3.org/2004/02/skos/core#',\n", " 'sosa': 'http://www.w3.org/ns/sosa/',\n", " 'ssn': 'http://www.w3.org/ns/ssn/',\n", " 'time': 'http://www.w3.org/2006/time#',\n", " 'vann': 'http://purl.org/vocab/vann/',\n", " 'void': 'http://rdfs.org/ns/void#',\n", " 'xsd': 'http://www.w3.org/2001/XMLSchema#'},\n", " '@type': 'schema1:Person',\n", " 'schema1:affiliation': {'@id': 'https://www.grid.ac/institutes/grid.5333.6',\n", " '@type': 'schema1:Organization',\n", " 'schema1:name': 'EPFL'},\n", " 'schema1:knows': {'@id': 'http://example.org/JohnDoe',\n", " '@type': 'schema1:Person',\n", " 'schema1:name': 'John Doe'},\n", " 'schema1:name': 'Jane Doe'},\n", " {'@context': {'brick': 'https://brickschema.org/schema/Brick#',\n", " 'csvw': 'http://www.w3.org/ns/csvw#',\n", " 'dc': 'http://purl.org/dc/elements/1.1/',\n", " 'dcam': 'http://purl.org/dc/dcam/',\n", " 'dcat': 'http://www.w3.org/ns/dcat#',\n", " 'dcmitype': 'http://purl.org/dc/dcmitype/',\n", " 'dcterms': 'http://purl.org/dc/terms/',\n", " 'doap': 'http://usefulinc.com/ns/doap#',\n", " 'foaf': 'http://xmlns.com/foaf/0.1/',\n", " 'odrl': 'http://www.w3.org/ns/odrl/2/',\n", " 'org': 'http://www.w3.org/ns/org#',\n", " 'owl': 'http://www.w3.org/2002/07/owl#',\n", " 'prof': 'http://www.w3.org/ns/dx/prof/',\n", " 'prov': 'http://www.w3.org/ns/prov#',\n", " 'qb': 'http://purl.org/linked-data/cube#',\n", " 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',\n", " 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',\n", " 'schema': 'https://schema.org/',\n", " 'schema1': 'http://schema.org/',\n", " 'sh': 'http://www.w3.org/ns/shacl#',\n", " 'skos': 'http://www.w3.org/2004/02/skos/core#',\n", " 'sosa': 'http://www.w3.org/ns/sosa/',\n", " 'ssn': 'http://www.w3.org/ns/ssn/',\n", " 'time': 'http://www.w3.org/2006/time#',\n", " 'vann': 'http://purl.org/vocab/vann/',\n", " 'void': 'http://rdfs.org/ns/void#',\n", " 'xsd': 'http://www.w3.org/2001/XMLSchema#'},\n", " '@id': 'http://example.org/JohnDoe',\n", " '@type': 'schema1:Person',\n", " 'schema1:name': 'John Doe'}]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "forge.as_jsonld(resources)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Select a Type" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "_type= [\"http://schema.org/Organization\"]\n", "resources = forge.from_graph(data=graph, type= _type,use_model_context=True)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "isinstance(resources, Resource)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(resources)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "isinstance(resources, Resource)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "epfl_json = {'id': 'https://www.grid.ac/institutes/grid.5333.6',\n", " 'type': 'Organization',\n", " 'name': 'EPFL'}" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "epfl_json == forge.as_json(resources)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Use the forge Model context" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "_type= [\"http://schema.org/Organization\"]\n", "resources = forge.from_graph(data=graph, type= _type, use_model_context=True)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "isinstance(resources, Resource)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.7 (nexusforgelatest)", "language": "python", "name": "nexusforgelatest" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }