{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Biolink Metamodel Test Notebook" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:10.387720Z", "iopub.status.busy": "2021-01-05T21:24:10.387211Z", "iopub.status.idle": "2021-01-05T21:24:11.039943Z", "shell.execute_reply": "2021-01-05T21:24:11.039542Z" } }, "outputs": [], "source": [ "!pip -q --disable-pip-version-check install yamlmagic\n", "%reload_ext yamlmagic" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:11.044596Z", "iopub.status.busy": "2021-01-05T21:24:11.044108Z", "iopub.status.idle": "2021-01-05T21:24:11.506777Z", "shell.execute_reply": "2021-01-05T21:24:11.507076Z" } }, "outputs": [], "source": [ "from IPython.core.display import display, HTML\n", "from types import ModuleType\n", "from json import loads, dumps\n", "from jsonasobj import JsonObj, as_json\n", "from rdflib import Graph\n", "\n", "from biolinkml.meta import SchemaDefinition\n", "from biolinkml.utils.schemaloader import SchemaLoader\n", "from biolinkml.utils.yamlutils import DupCheckYamlLoader, as_json_object as yaml_to_json\n", "\n", "from biolinkml.generators.shexgen import ShExGenerator\n", "from biolinkml.generators.pythongen import PythonGenerator\n", "from biolinkml.generators.yumlgen import YumlGenerator\n", "from biolinkml.generators.jsonldcontextgen import ContextGenerator" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Basic model structure\n", "A biolink model consists of:\n", "* a name\n", "* a uri\n", "* type definitions\n", "* slot definitions\n", "* class definitions\n", "* subset definitions\n", "\n", "As an example, the model below defines:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:11.513535Z", "iopub.status.busy": "2021-01-05T21:24:11.513099Z", "iopub.status.idle": "2021-01-05T21:24:11.520166Z", "shell.execute_reply": "2021-01-05T21:24:11.520491Z" } }, "outputs": [ { "data": { "application/javascript": [ "\n", " require(\n", " [\n", " \"notebook/js/codecell\",\n", " \"codemirror/mode/yaml/yaml\"\n", " ],\n", " function(cc){\n", " cc.CodeCell.options_default.highlight_modes.magic_yaml = {\n", " reg: [\"^%%yaml\"]\n", " }\n", " }\n", " );\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%%yaml --loader DupCheckYamlLoader yaml\n", "id: http://example.org/sample/example1\n", "name: synopsis2\n", "prefixes:\n", " foaf: http://xmlns.com/foaf/0.1/\n", " samp: http://example.org/model/\n", " xsd: http://www.w3.org/2001/XMLSchema#\n", "\n", "default_prefix: samp\n", "\n", "default_curi_maps:\n", " - semweb_context\n", "\n", "default_range: string\n", "\n", "types:\n", " string:\n", " base: str\n", " uri: xsd:string\n", " int:\n", " base: int\n", " uri: xsd:integer\n", " boolean:\n", " base: Bool\n", " uri: xsd:boolean\n", "\n", "\n", "classes:\n", " person:\n", " description: A person, living or dead\n", " slots:\n", " - id\n", " - first name\n", " - last name\n", " - age\n", " - living\n", " - knows\n", "\n", " friendly_person:\n", " description: Any person that knows someone\n", " is_a: person\n", " slot_usage:\n", " knows:\n", " required: True\n", "\n", "slots:\n", " id:\n", " description: Unique identifier of a person\n", " identifier: true\n", "\n", " first name:\n", " description: The first name of a person\n", " slot_uri: foaf:firstName\n", " multivalued: true\n", "\n", " last name:\n", " description: The last name of a person\n", " slot_uri: foaf:lastName\n", " required: true\n", "\n", " living:\n", " description: Whether the person is alive\n", " range: boolean\n", " comments:\n", " - unspecified means unknown\n", "\n", " age:\n", " description: The age of a person if living or age of death if not\n", " range: int\n", " slot_uri: foaf:age\n", "\n", " knows:\n", " description: A person known by this person (indicating some level of reciprocated interaction between the parties).\n", " range: person\n", " slot_uri: foaf:knows\n", " multivalued: true" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### We can emit this model as a Python class" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:11.525458Z", "iopub.status.busy": "2021-01-05T21:24:11.524982Z", "iopub.status.idle": "2021-01-05T21:24:11.626575Z", "shell.execute_reply": "2021-01-05T21:24:11.626977Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "# Auto generated from None by pythongen.py version: 0.9.0\n", "# Generation date: 2021-01-05 15:24\n", "# Schema: synopsis2\n", "#\n", "# id: http://example.org/sample/example1\n", "# description:\n", "# license:\n", "\n", "import dataclasses\n", "import sys\n", "import re\n", "from typing import Optional, List, Union, Dict, ClassVar, Any\n", "from dataclasses import dataclass\n", "from biolinkml.meta import EnumDefinition, PermissibleValue, PvFormulaOptions\n", "\n", "from biolinkml.utils.slot import Slot\n", "from biolinkml.utils.metamodelcore import empty_list, empty_dict, bnode\n", "from biolinkml.utils.yamlutils import YAMLRoot, extended_str, extended_float, extended_int\n", "if sys.version_info < (3, 7, 6):\n", " from biolinkml.utils.dataclass_extensions_375 import dataclasses_init_fn_with_kwargs\n", "else:\n", " from biolinkml.utils.dataclass_extensions_376 import dataclasses_init_fn_with_kwargs\n", "from biolinkml.utils.formatutils import camelcase, underscore, sfx\n", "from biolinkml.utils.enumerations import EnumDefinitionImpl\n", "from rdflib import Namespace, URIRef\n", "from biolinkml.utils.curienamespace import CurieNamespace\n", "from biolinkml.utils.metamodelcore import Bool\n", "\n", "metamodel_version = \"1.7.0\"\n", "\n", "# Overwrite dataclasses _init_fn to add **kwargs in __init__\n", "dataclasses._init_fn = dataclasses_init_fn_with_kwargs\n", "\n", "# Namespaces\n", "FOAF = CurieNamespace('foaf', 'http://xmlns.com/foaf/0.1/')\n", "SAMP = CurieNamespace('samp', 'http://example.org/model/')\n", "XSD = CurieNamespace('xsd', 'http://www.w3.org/2001/XMLSchema#')\n", "DEFAULT_ = SAMP\n", "\n", "\n", "# Types\n", "class String(str):\n", " type_class_uri = XSD.string\n", " type_class_curie = \"xsd:string\"\n", " type_name = \"string\"\n", " type_model_uri = SAMP.String\n", "\n", "\n", "class Int(int):\n", " type_class_uri = XSD.integer\n", " type_class_curie = \"xsd:integer\"\n", " type_name = \"int\"\n", " type_model_uri = SAMP.Int\n", "\n", "\n", "class Boolean(Bool):\n", " type_class_uri = XSD.boolean\n", " type_class_curie = \"xsd:boolean\"\n", " type_name = \"boolean\"\n", " type_model_uri = SAMP.Boolean\n", "\n", "\n", "# Class references\n", "class PersonId(extended_str):\n", " pass\n", "\n", "\n", "class FriendlyPersonId(PersonId):\n", " pass\n", "\n", "\n", "@dataclass\n", "class Person(YAMLRoot):\n", " \"\"\"\n", " A person, living or dead\n", " \"\"\"\n", " _inherited_slots: ClassVar[List[str]] = []\n", "\n", " class_class_uri: ClassVar[URIRef] = SAMP.Person\n", " class_class_curie: ClassVar[str] = \"samp:Person\"\n", " class_name: ClassVar[str] = \"person\"\n", " class_model_uri: ClassVar[URIRef] = SAMP.Person\n", "\n", " id: Union[str, PersonId] = None\n", " last_name: str = None\n", " first_name: Optional[Union[str, List[str]]] = empty_list()\n", " age: Optional[int] = None\n", " living: Optional[Bool] = None\n", " knows: Optional[Union[Union[str, PersonId], List[Union[str, PersonId]]]] = empty_list()\n", "\n", " def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):\n", " if self.id is None:\n", " raise ValueError(\"id must be supplied\")\n", " if not isinstance(self.id, PersonId):\n", " self.id = PersonId(self.id)\n", "\n", " if self.last_name is None:\n", " raise ValueError(\"last_name must be supplied\")\n", " if not isinstance(self.last_name, str):\n", " self.last_name = str(self.last_name)\n", "\n", " if self.first_name is None:\n", " self.first_name = []\n", " if not isinstance(self.first_name, list):\n", " self.first_name = [self.first_name]\n", " self.first_name = [v if isinstance(v, str) else str(v) for v in self.first_name]\n", "\n", " if self.age is not None and not isinstance(self.age, int):\n", " self.age = int(self.age)\n", "\n", " if self.living is not None and not isinstance(self.living, Bool):\n", " self.living = Bool(self.living)\n", "\n", " if self.knows is None:\n", " self.knows = []\n", " if not isinstance(self.knows, list):\n", " self.knows = [self.knows]\n", " self.knows = [v if isinstance(v, PersonId) else PersonId(v) for v in self.knows]\n", "\n", " super().__post_init__(**kwargs)\n", "\n", "\n", "@dataclass\n", "class FriendlyPerson(Person):\n", " \"\"\"\n", " Any person that knows someone\n", " \"\"\"\n", " _inherited_slots: ClassVar[List[str]] = []\n", "\n", " class_class_uri: ClassVar[URIRef] = SAMP.FriendlyPerson\n", " class_class_curie: ClassVar[str] = \"samp:FriendlyPerson\"\n", " class_name: ClassVar[str] = \"friendly_person\"\n", " class_model_uri: ClassVar[URIRef] = SAMP.FriendlyPerson\n", "\n", " id: Union[str, FriendlyPersonId] = None\n", " last_name: str = None\n", " knows: Union[Union[str, PersonId], List[Union[str, PersonId]]] = None\n", "\n", " def __post_init__(self, *_: List[str], **kwargs: Dict[str, Any]):\n", " if self.id is None:\n", " raise ValueError(\"id must be supplied\")\n", " if not isinstance(self.id, FriendlyPersonId):\n", " self.id = FriendlyPersonId(self.id)\n", "\n", " if self.knows is None:\n", " raise ValueError(\"knows must be supplied\")\n", " elif not isinstance(self.knows, list):\n", " self.knows = [self.knows]\n", " elif len(self.knows) == 0:\n", " raise ValueError(f\"knows must be a non-empty list\")\n", " self.knows = [v if isinstance(v, PersonId) else PersonId(v) for v in self.knows]\n", "\n", " super().__post_init__(**kwargs)\n", "\n", "\n", "# Enumerations\n", "\n", "\n", "# Slots\n", "class slots:\n", " pass\n", "\n", "slots.id = Slot(uri=SAMP.id, name=\"id\", curie=SAMP.curie('id'),\n", " model_uri=SAMP.id, domain=None, range=URIRef)\n", "\n", "slots.first_name = Slot(uri=FOAF.firstName, name=\"first name\", curie=FOAF.curie('firstName'),\n", " model_uri=SAMP.first_name, domain=None, range=Optional[Union[str, List[str]]])\n", "\n", "slots.last_name = Slot(uri=FOAF.lastName, name=\"last name\", curie=FOAF.curie('lastName'),\n", " model_uri=SAMP.last_name, domain=None, range=str)\n", "\n", "slots.living = Slot(uri=SAMP.living, name=\"living\", curie=SAMP.curie('living'),\n", " model_uri=SAMP.living, domain=None, range=Optional[Bool])\n", "\n", "slots.age = Slot(uri=FOAF.age, name=\"age\", curie=FOAF.curie('age'),\n", " model_uri=SAMP.age, domain=None, range=Optional[int])\n", "\n", "slots.knows = Slot(uri=FOAF.knows, name=\"knows\", curie=FOAF.curie('knows'),\n", " model_uri=SAMP.knows, domain=None, range=Optional[Union[Union[str, PersonId], List[Union[str, PersonId]]]])\n", "\n", "slots.friendly_person_knows = Slot(uri=SAMP.knows, name=\"friendly_person_knows\", curie=SAMP.curie('knows'),\n", " model_uri=SAMP.friendly_person_knows, domain=FriendlyPerson, range=Union[Union[str, PersonId], List[Union[str, PersonId]]])\n" ] } ], "source": [ "print(PythonGenerator(yaml).serialize())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Compile the python into a module" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:11.633547Z", "iopub.status.busy": "2021-01-05T21:24:11.633049Z", "iopub.status.idle": "2021-01-05T21:24:11.728211Z", "shell.execute_reply": "2021-01-05T21:24:11.727822Z" } }, "outputs": [], "source": [ "spec = compile(PythonGenerator(yaml).serialize(), 'test', 'exec')\n", "module = ModuleType('test')\n", "exec(spec, module.__dict__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### We can emit a UML rendering of this model" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:11.733482Z", "iopub.status.busy": "2021-01-05T21:24:11.732985Z", "iopub.status.idle": "2021-01-05T21:24:11.827656Z", "shell.execute_reply": "2021-01-05T21:24:11.828367Z" } }, "outputs": [ { "data": { "text/html": [ "[Person],[Person]^-[FriendlyPerson],[FriendlyPerson]\"/>" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(HTML(f''))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### We can emit a JSON-LD context for the model:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:11.835391Z", "iopub.status.busy": "2021-01-05T21:24:11.834817Z", "iopub.status.idle": "2021-01-05T21:24:11.928102Z", "shell.execute_reply": "2021-01-05T21:24:11.928573Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"_comments\": \"Auto generated from None by jsonldcontextgen.py version: 0.1.1\\nGeneration date: 2021-01-05 15:24\\nSchema: synopsis2\\n\\nid: http://example.org/sample/example1\\ndescription: \\nlicense: \\n\",\n", " \"@context\": {\n", " \"type\": \"@type\",\n", " \"foaf\": \"http://xmlns.com/foaf/0.1/\",\n", " \"samp\": \"http://example.org/model/\",\n", " \"xsd\": \"http://www.w3.org/2001/XMLSchema#\",\n", " \"@vocab\": \"http://example.org/model/\",\n", " \"age\": {\n", " \"@type\": \"xsd:integer\",\n", " \"@id\": \"foaf:age\"\n", " },\n", " \"first_name\": {\n", " \"@id\": \"foaf:firstName\"\n", " },\n", " \"id\": \"@id\",\n", " \"knows\": {\n", " \"@type\": \"@id\",\n", " \"@id\": \"foaf:knows\"\n", " },\n", " \"last_name\": {\n", " \"@id\": \"foaf:lastName\"\n", " },\n", " \"living\": {\n", " \"@type\": \"xsd:boolean\"\n", " },\n", " \"@base\": \"http://example.org/people/\"\n", " }\n", "}\n" ] } ], "source": [ "cntxt = loads(ContextGenerator(yaml).serialize(base=\"http://example.org/people/\"))\n", "print(dumps(cntxt, indent=\" \"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The python model can be used to create classes" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:11.932127Z", "iopub.status.busy": "2021-01-05T21:24:11.931467Z", "iopub.status.idle": "2021-01-05T21:24:11.934077Z", "shell.execute_reply": "2021-01-05T21:24:11.933666Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Person(id='42', last_name='smith', first_name=['Joe', 'Bob'], age=43, living=None, knows=[])\n" ] } ], "source": [ "# Generate a person\n", "joe_smith = module.Person(id=\"42\", last_name=\"smith\", first_name=['Joe', 'Bob'], age=43)\n", "print(joe_smith)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### and can be combined w/ the JSON-LD Context to generate RDF" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:11.938396Z", "iopub.status.busy": "2021-01-05T21:24:11.937895Z", "iopub.status.idle": "2021-01-05T21:24:11.951167Z", "shell.execute_reply": "2021-01-05T21:24:11.951547Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"id\": \"42\",\n", " \"last_name\": \"smith\",\n", " \"first_name\": [\n", " \"Joe\",\n", " \"Bob\"\n", " ],\n", " \"age\": 43,\n", " \"living\": null,\n", " \"knows\": [],\n", " \"type\": \"Person\",\n", " \"@context\": {\n", " \"type\": \"@type\",\n", " \"foaf\": \"http://xmlns.com/foaf/0.1/\",\n", " \"samp\": \"http://example.org/model/\",\n", " \"xsd\": \"http://www.w3.org/2001/XMLSchema#\",\n", " \"@vocab\": \"http://example.org/model/\",\n", " \"age\": {\n", " \"@type\": \"xsd:integer\",\n", " \"@id\": \"foaf:age\"\n", " },\n", " \"first_name\": {\n", " \"@id\": \"foaf:firstName\"\n", " },\n", " \"id\": \"@id\",\n", " \"knows\": {\n", " \"@type\": \"@id\",\n", " \"@id\": \"foaf:knows\"\n", " },\n", " \"last_name\": {\n", " \"@id\": \"foaf:lastName\"\n", " },\n", " \"living\": {\n", " \"@type\": \"xsd:boolean\"\n", " },\n", " \"@base\": \"http://example.org/people/\"\n", " }\n", "}\n", "@prefix : .\n", "@prefix foaf: .\n", "@prefix samp: .\n", "@prefix xsd: .\n", "\n", " a samp:Person ;\n", " foaf:age 43 ;\n", " foaf:firstName \"Bob\",\n", " \"Joe\" ;\n", " foaf:lastName \"smith\" .\n", "\n", "\n" ] } ], "source": [ "# Add the context and turn it into RDF\n", "jsonld = as_json(yaml_to_json(joe_smith, cntxt))\n", "print(jsonld)\n", "g = Graph()\n", "g.parse(data=jsonld, format=\"json-ld\")\n", "print(g.serialize(format=\"turtle\").decode())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The model can be turned into ShEx" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:11.956945Z", "iopub.status.busy": "2021-01-05T21:24:11.956495Z", "iopub.status.idle": "2021-01-05T21:24:12.049544Z", "shell.execute_reply": "2021-01-05T21:24:12.049910Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BASE \n", "PREFIX rdf: \n", "PREFIX xsd: \n", "PREFIX foaf: \n", "\n", "\n", " xsd:string\n", "\n", " xsd:integer\n", "\n", " xsd:boolean\n", "\n", " CLOSED {\n", " ( $ ( & ;\n", " rdf:type [ ] ? ;\n", " @ +\n", " ) ;\n", " rdf:type [ ]\n", " )\n", "}\n", "\n", " (\n", " CLOSED {\n", " ( $ ( foaf:firstName @ * ;\n", " foaf:lastName @ ;\n", " foaf:age @ ? ;\n", " @ ? ;\n", " foaf:knows @ *\n", " ) ;\n", " rdf:type [ ]\n", " )\n", " } OR @\n", ")\n", "\n", "\n", "\n" ] } ], "source": [ "shex = ShExGenerator(yaml).serialize(collections=False)\n", "print(shex)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### The ShEx can then be used to validate RDF" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:12.053127Z", "iopub.status.busy": "2021-01-05T21:24:12.052678Z", "iopub.status.idle": "2021-01-05T21:24:12.162974Z", "shell.execute_reply": "2021-01-05T21:24:12.163311Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Conforms\n" ] } ], "source": [ "from pyshex.evaluate import evaluate\n", "r = evaluate(g, shex,\n", " start=\"http://example.org/model/Person\",\n", " focus=\"http://example.org/people/42\")\n", "print(\"Conforms\" if r[0] else r[1])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2021-01-05T21:24:12.196059Z", "iopub.status.busy": "2021-01-05T21:24:12.195557Z", "iopub.status.idle": "2021-01-05T21:24:12.197143Z", "shell.execute_reply": "2021-01-05T21:24:12.197416Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Testing against shape http://example.org/model/FriendlyPerson\n", " No matching triples found for predicate samp:knows\n" ] } ], "source": [ "r = evaluate(g, shex,\n", " start=\"http://example.org/model/FriendlyPerson\",\n", " focus=\"http://example.org/people/42\")\n", "print(\"Conforms\" if r[0] else r[1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 2 }