{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Biolink Metamodel Test Notebook" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: yamlmagic in /Users/solbrig/.local/share/virtualenvs/jupyter-AOORh7Nt/lib/python3.7/site-packages (0.2.0)\r\n" ] } ], "source": [ "!pip install yamlmagic\n", "%reload_ext yamlmagic" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "from IPython.core.display import display, HTML\n", "from types import ModuleType\n", "from json import loads, dumps\n", "from jsonasobj import JsonObj, as_json_object\n", "from rdflib import Graph\n", "\n", "from biolinkml.meta import SchemaDefinition\n", "from biolinkml.utils.schemaloader import SchemaLoader\n", "from biolinkml.utils.yamlutils import DupCheckYamlLoader, as_json_object as yaml_to_json\n", "\n", "from biolinkml.generators.shexgen import ShExGenerator\n", "from biolinkml.generators.pythongen import PythonGenerator\n", "from biolinkml.generators.yumlgen import YumlGenerator\n", "from biolinkml.generators.jsonldcontextgen import ContextGenerator" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Basic model structure\n", "A biolink model consists of:\n", "* a name\n", "* a uri\n", "* type definitions\n", "* slot definitions\n", "* class definitions\n", "* subset definitions\n", "\n", "As an example, the model below defines:" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "application/javascript": [ "\n", " require(\n", " [\n", " \"notebook/js/codecell\",\n", " \"codemirror/mode/yaml/yaml\"\n", " ],\n", " function(cc){\n", " cc.CodeCell.options_default.highlight_modes.magic_yaml = {\n", " reg: [\"^%%yaml\"]\n", " }\n", " }\n", " );\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%%yaml --loader DupCheckYamlLoader yaml\n", "id: http://example.org/sample/example1\n", "name: synopsis2\n", "prefixes:\n", " foaf: http://xmlns.com/foaf/0.1/\n", " samp: http://example.org/model/\n", " xsd: http://www.w3.org/2001/XMLSchema#\n", " \n", "default_prefix: samp\n", "\n", "default_curi_maps:\n", " - semweb_context\n", " \n", "default_range: string\n", "\n", "types:\n", " string:\n", " base: str\n", " uri: xsd:string\n", " int:\n", " base: int\n", " uri: xsd:integer\n", " boolean:\n", " base: Bool\n", " uri: xsd:boolean\n", " \n", "\n", "classes:\n", " person:\n", " description: A person, living or dead\n", " slots:\n", " - id\n", " - first name\n", " - last name\n", " - age\n", " - living\n", " - knows\n", " \n", " friendly_person:\n", " description: Any person that knows someone\n", " is_a: person\n", " slot_usage:\n", " knows:\n", " required: True\n", "\n", "slots:\n", " id:\n", " description: Unique identifier of a person\n", " identifier: true\n", "\n", " first name:\n", " description: The first name of a person\n", " slot_uri: foaf:firstName\n", " multivalued: true\n", " \n", " last name:\n", " description: The last name of a person\n", " slot_uri: foaf:lastName\n", " required: true\n", " \n", " living:\n", " description: Whether the person is alive\n", " range: boolean\n", " comments:\n", " - unspecified means unknown\n", " \n", " age:\n", " description: The age of a person if living or age of death if not\n", " range: int\n", " slot_uri: foaf:age\n", " \n", " knows:\n", " description: A person known by this person (indicating some level of reciprocated interaction between the parties).\n", " range: person\n", " slot_uri: foaf:knows\n", " multivalued: true" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### We can emit this model as a Python class" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# Auto generated from None by pythongen.py version: 0.2.0\n", "# Generation date: 2019-04-09 15:50\n", "# Schema: synopsis2\n", "#\n", "# id: http://example.org/sample/example1\n", "# description:\n", "# license:\n", "\n", "from typing import Optional, List, Union, Dict, ClassVar\n", "from dataclasses import dataclass\n", "from biolinkml.utils.metamodelcore import empty_list, empty_dict\n", "from biolinkml.utils.yamlutils import YAMLRoot\n", "from biolinkml.utils.metamodelcore import Bool\n", "\n", "metamodel_version = \"1.3.2\"\n", "\n", "# Types\n", "class String(str):\n", " pass\n", "\n", "\n", "class Int(int):\n", " pass\n", "\n", "\n", "class Boolean(Bool):\n", " pass\n", "\n", "\n", "# Class references\n", "class PersonId(str):\n", " pass\n", "\n", "\n", "class FriendlyPersonId(PersonId):\n", " pass\n", "\n", "\n", "@dataclass\n", "class Person(YAMLRoot):\n", " \"\"\"\n", " A person, living or dead\n", " \"\"\"\n", " _inherited_slots: ClassVar[List[str]] = []\n", "\n", " # === person ===\n", " id: Union[str, PersonId]\n", " last_name: str\n", " first_name: List[str] = empty_list()\n", " age: Optional[int] = None\n", " living: Optional[Bool] = None\n", " knows: List[Union[str, PersonId]] = empty_list()\n", "\n", " def _fix_elements(self):\n", " super()._fix_elements()\n", " if not isinstance(self.id, PersonId):\n", " self.id = PersonId(self.id)\n", " self.knows = [v if isinstance(v, PersonId)\n", " else PersonId(v) for v in self.knows]\n", "\n", "\n", "@dataclass\n", "class FriendlyPerson(Person):\n", " \"\"\"\n", " Any person that knows someone\n", " \"\"\"\n", " _inherited_slots: ClassVar[List[str]] = []\n", "\n", " # === person ===\n", " id: Union[str, FriendlyPersonId] = None\n", " last_name: str = None\n", " first_name: List[str] = empty_list()\n", " age: Optional[int] = None\n", " living: Optional[Bool] = None\n", "\n", " # === friendly_person ===\n", " knows: List[Union[str, PersonId]] = empty_list()\n", "\n", " def _fix_elements(self):\n", " super()._fix_elements()\n", " if self.id is not None and not isinstance(self.id, FriendlyPersonId):\n", " self.id = FriendlyPersonId(self.id)\n", " self.knows = [v if isinstance(v, PersonId)\n", " else PersonId(v) for v in self.knows]\n", "\n" ] } ], "source": [ "print(PythonGenerator(yaml).serialize())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Compile the python into a module" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "spec = compile(PythonGenerator(yaml).serialize(), 'test', 'exec')\n", "module = ModuleType('test')\n", "exec(spec, module.__dict__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### We can emit a UML rendering of this model" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "[Person], [Person]^-[FriendlyPerson]\"/>" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(HTML(f''))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### We can emit a JSON-LD context for the model:" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"_comments\": \"Auto generated from None by jsonldcontextgen.py version: 0.0.2\\nGeneration date: 2019-04-09 15:50\\nSchema: synopsis2\\n\\nid: http://example.org/sample/example1\\ndescription: \\nlicense: \\n\",\n", " \"@context\": {\n", " \"_comments\": null,\n", " \"type\": \"@type\",\n", " \"foaf\": \"http://xmlns.com/foaf/0.1/\",\n", " \"xsd\": \"http://www.w3.org/2001/XMLSchema#\",\n", " \"@vocab\": \"http://example.org/model/\",\n", " \"age\": {\n", " \"@type\": \"xsd:integer\",\n", " \"@id\": \"foaf:age\"\n", " },\n", " \"first_name\": {\n", " \"@id\": \"foaf:firstName\"\n", " },\n", " \"id\": \"@id\",\n", " \"knows\": {\n", " \"@type\": \"@id\",\n", " \"@id\": \"foaf:knows\"\n", " },\n", " \"last_name\": {\n", " \"@id\": \"foaf:lastName\"\n", " },\n", " \"living\": {\n", " \"@type\": \"xsd:boolean\"\n", " },\n", " \"@base\": \"http://example.org/people/\"\n", " }\n", "}\n" ] } ], "source": [ "cntxt = loads(ContextGenerator(yaml).serialize(base=\"http://example.org/people/\"))\n", "print(dumps(cntxt, indent=\" \"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The python model can be used to create classes" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Person(id='42', last_name='smith', first_name=['Joe', 'Bob'], age=43, living=None, knows=[])\n" ] } ], "source": [ "# Generate a person\n", "joe_smith = module.Person(id=\"42\", last_name=\"smith\", first_name=['Joe', 'Bob'], age=43)\n", "print(joe_smith)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### and can be combined w/ the JSON-LD Context to generate RDF" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"id\": \"42\",\n", " \"last_name\": \"smith\",\n", " \"first_name\": [\n", " \"Joe\",\n", " \"Bob\"\n", " ],\n", " \"age\": 43,\n", " \"living\": null,\n", " \"knows\": [],\n", " \"type\": \"Person\",\n", " \"_comments\": \"Auto generated from None by jsonldcontextgen.py version: 0.0.2\\nGeneration date: 2019-04-09 15:50\\nSchema: synopsis2\\n\\nid: http://example.org/sample/example1\\ndescription: \\nlicense: \\n\",\n", " \"@context\": {\n", " \"_comments\": null,\n", " \"type\": \"@type\",\n", " \"foaf\": \"http://xmlns.com/foaf/0.1/\",\n", " \"xsd\": \"http://www.w3.org/2001/XMLSchema#\",\n", " \"@vocab\": \"http://example.org/model/\",\n", " \"age\": {\n", " \"@type\": \"xsd:integer\",\n", " \"@id\": \"foaf:age\"\n", " },\n", " \"first_name\": {\n", " \"@id\": \"foaf:firstName\"\n", " },\n", " \"id\": \"@id\",\n", " \"knows\": {\n", " \"@type\": \"@id\",\n", " \"@id\": \"foaf:knows\"\n", " },\n", " \"last_name\": {\n", " \"@id\": \"foaf:lastName\"\n", " },\n", " \"living\": {\n", " \"@type\": \"xsd:boolean\"\n", " },\n", " \"@base\": \"http://example.org/people/\"\n", " }\n", "}\n", "@prefix : .\n", "@prefix foaf: .\n", "@prefix rdf: .\n", "@prefix rdfs: .\n", "@prefix xml: .\n", "@prefix xsd: .\n", "\n", " a :Person ;\n", " foaf:age 43 ;\n", " foaf:firstName \"Bob\",\n", " \"Joe\" ;\n", " foaf:lastName \"smith\" .\n", "\n", "\n" ] } ], "source": [ "# Add the context and turn it into RDF\n", "jsonld = as_json_object(yaml_to_json(joe_smith, cntxt))\n", "print(jsonld)\n", "g = Graph()\n", "g.parse(data=jsonld, format=\"json-ld\")\n", "print(g.serialize(format=\"turtle\").decode())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The model can be turned into ShEx" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BASE \n", "PREFIX rdf: \n", "PREFIX xsd: \n", "PREFIX foaf: \n", "\n", "\n", " xsd:string\n", "\n", " xsd:integer\n", "\n", " xsd:boolean\n", "\n", " EXTRA rdf:type CLOSED {\n", " ( $ ( @ + ;\n", " &\n", " ) ;\n", " rdf:type [ ]\n", " )\n", "}\n", "\n", " EXTRA rdf:type CLOSED {\n", " ( $ ( foaf:firstName @ * ;\n", " foaf:lastName @ ;\n", " foaf:age @ ? ;\n", " @ ? ;\n", " foaf:knows @ *\n", " ) ;\n", " rdf:type [ ]\n", " )\n", "}\n", "\n", "\n", "\n" ] } ], "source": [ "shex = ShExGenerator(yaml).serialize(collections=False)\n", "print(shex)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The ShEx can then be used to validate RDF" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Conforms\n" ] } ], "source": [ "from pyshex.evaluate import evaluate\n", "r = evaluate(g, shex, \n", " start=\"http://example.org/model/Person\", \n", " focus=\"http://example.org/people/42\")\n", "print(\"Conforms\" if r[0] else r[1])" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Testing against shape http://example.org/model/FriendlyPerson\n", " Node: :Person not in value set:\n", "\t {\"values\": [\"http://example.org/model/FriendlyPerson\"], \"typ...\n", " Testing against shape http://example.org/model/FriendlyPerson\n", " Node: :Person not in value set:\n", "\t {\"values\": [\"http://example.org/model/FriendlyPerson\"], \"typ...\n", " Testing against shape http://example.org/model/FriendlyPerson\n", " No matching triples found for predicate rdf:type\n" ] } ], "source": [ "r = evaluate(g, shex, \n", " start=\"http://example.org/model/FriendlyPerson\", \n", " focus=\"http://example.org/people/42\")\n", "print(\"Conforms\" if r[0] else r[1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2" } }, "nbformat": 4, "nbformat_minor": 2 }