{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3e37f9e6", "metadata": {}, "outputs": [], "source": [ "# what biolink:category does my identifier represent\n", "# how to find the predicates used for gene to disease mappings\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "169c9e40", "metadata": {}, "outputs": [], "source": [ "from linkml_runtime.utils.schemaview import SchemaView\n", "import requests \n", "# note you can also use a path on a local filesystem\n", "view = SchemaView(\"https://raw.githubusercontent.com/biolink/biolink-model/master/biolink-model.yaml\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "54424ac5", "metadata": {}, "outputs": [], "source": [ "# what biolink:category does my identifier represent?\n", "# id_prefixes\n", "# mappings\n", "\n", "element = view.get_element('disease')\n", "print(type(element))\n", "\n", "element = view.get_element('phenotype of')\n", "print(element.name)\n" ] }, { "cell_type": "code", "execution_count": 16, "id": "61eeb009", "metadata": {}, "outputs": [], "source": [ "# find inverses of a predicate\n", "print(\"inverse is: \" + view.inverse(element.name))" ] }, { "cell_type": "code", "execution_count": 17, "id": "91212ad9", "metadata": {}, "outputs": [], "source": [ "# id_prefixes\n", "prefixed_categories = view.get_elements_applicable_by_identifier(\"DOID:4\")\n", "print(prefixed_categories)" ] }, { "cell_type": "code", "execution_count": 18, "id": "49d364e9", "metadata": {}, "outputs": [], "source": [ "# mappings \n", "\n", "mapped_categories = view.get_category_by_mapping('SO:0001583')\n", "print(mapped_categories)\n" ] }, { "cell_type": "code", "execution_count": 21, "id": "a7a0fa3f", "metadata": {}, "outputs": [], "source": [ "# object = 'gene'\n", "# object = 'disease'\n", "# object = 'phenotypic feature'\n", "object = 'sequence variant'\n", "\n", "query_prefix = f'https://www.ebi.ac.uk/ols/api/ontologies/_ontology/terms/'\n", "mappings = view.get_mappings(object)\n", "if len(mappings) == 0:\n", " print(\"no exact mappings found for: \" + object)\n", "for exact_mapping in mappings.get('exact'):\n", " url = query_prefix.replace(\"_ontology\", exact_mapping.split(\":\")[0]) + \"http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252F\" + exact_mapping.replace(\":\", \"_\")\n", " response = requests.get(url)\n", " if response.status_code == 200:\n", " print(exact_mapping)\n", " element = view.get_element(object)\n", " if element.description is None:\n", " print(\"biolink description is empty\")\n", " else:\n", " print(\"description: \" + element.description +\"\\n\")\n", " if response.json().get('description') is None:\n", " print (\"no definition found via OLS\")\n", " else: \n", " for description in response.json().get('description'):\n", " print(\"OLS description: \" + description)\n", " else:\n", " print(exact_mapping + \": can't find any matching terms in OLS that don't return 404 errors\")\n", " " ] }, { "cell_type": "code", "execution_count": 10, "id": "5fbbc1c1", "metadata": {}, "outputs": [], "source": [ "# is my element a mixin?\n", "\n", "e = view.get_element('gene or gene product')\n", "view.is_mixin(e.name)" ] }, { "cell_type": "code", "execution_count": 22, "id": "5ab2f5c3", "metadata": {}, "outputs": [], "source": [ "# view poly hierarchy - a gene is a chemical and biological entity\n", "\n", "ancestors = view.class_ancestors('gene')\n", "for a in ancestors:\n", " print(a)\n" ] }, { "cell_type": "code", "execution_count": 23, "id": "2f9e0c55", "metadata": {}, "outputs": [], "source": [ "# how to find the predicates used for gene to disease mappings\n", "# association: \n", "# exact_mappings:\n", "# - OBAN:association\n", "# - rdf:Statement\n", "# - owl:Axiom\n", "# check out the biolink-model onion again\n", "\n", "associations = [c for c in view.all_classes().keys() if view.is_relationship(c)][0:100]\n", "print(len(associations))\n", "for a in associations:\n", " if a.startswith('gene'):\n", " print(a)\n" ] }, { "cell_type": "code", "execution_count": 26, "id": "2779d0d9", "metadata": {}, "outputs": [], "source": [ "for association in associations:\n", " domain_element = view.get_element(view.induced_slot('subject', association).range)\n", " range_element = view.get_element(view.induced_slot('object', association).range)\n", " \n", " if 'gene' in view.class_ancestors(domain_element.name) and 'disease' in view.class_ancestors(range_element.name):\n", " print(association)\n", " #print(view.induced_slot('subject', association))\n", " if 'gene or gene product' in view.class_ancestors(domain_element.name) and 'disease' in view.class_ancestors(range_element.name):\n", " print(association)\n", " print(view.induced_slot('subject', association))\n" ] }, { "cell_type": "code", "execution_count": null, "id": "04093dc5", "metadata": {}, "outputs": [], "source": [ "# find predicates for those associations\n", "# at this point, navigating the online doc might be easiest if you just want answers. \n", "# programatically, we can get the predicates that have equivalent domain and range constraints to find which \n", "# coudl be used for associations above.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "6c8ade22", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 5 }