{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "3e37f9e6", "metadata": {}, "outputs": [], "source": [ "# what biolink:category does my identifier represent\n", "# how to find the predicates used for gene to disease mappings\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "169c9e40", "metadata": {}, "outputs": [], "source": [ "from linkml_runtime.utils.schemaview import SchemaView\n", "import requests \n", "from pprint import pprint\n", "# note you can also use a path on a local filesystem\n", "view = SchemaView(\"https://raw.githubusercontent.com/biolink/biolink-model/master/biolink-model.yaml\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "54424ac5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<class 'linkml_runtime.linkml_model.meta.ClassDefinition'>\n", "phenotype of\n" ] } ], "source": [ "# what biolink:category does my identifier represent?\n", "# id_prefixes\n", "# mappings\n", "\n", "element = view.get_element('disease')\n", "print(type(element))\n", "\n", "element = view.get_element('phenotype of')\n", "print(element.name)\n" ] }, { "cell_type": "code", "execution_count": 16, "id": "61eeb009", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "inverse is: has phenotype\n" ] } ], "source": [ "# find inverses of a predicate\n", "print(\"inverse is: \" + view.inverse(element.name))" ] }, { "cell_type": "code", "execution_count": 17, "id": "91212ad9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['disease']\n" ] } ], "source": [ "# id_prefixes\n", "prefixed_categories = view.get_elements_applicable_by_identifier(\"DOID:4\")\n", "print(prefixed_categories)" ] }, { "cell_type": "code", "execution_count": 18, "id": "49d364e9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['is missense variant of']\n" ] } ], "source": [ "# mappings \n", "\n", "mapped_categories = view.get_category_by_mapping('SO:0001583')\n", "print(mapped_categories)\n" ] }, { "cell_type": "code", "execution_count": 21, "id": "a7a0fa3f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GENO:0000002\n", "description: An allele that varies in its sequence from what is considered the reference allele at that locus.\n", "\n", "OLS description: An allele that varies in it sequence from what is considered the reference or canonical sequence at that location.\n", "WIKIDATA:Q15304597: can't find any matching terms in OLS that don't return 404 errors\n", "SIO:010277: can't find any matching terms in OLS that don't return 404 errors\n", "VMC:Allele: can't find any matching terms in OLS that don't return 404 errors\n", "SO:0001059\n", "description: An allele that varies in its sequence from what is considered the reference allele at that locus.\n", "\n", "OLS description: A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence.\n" ] } ], "source": [ "# object = 'gene'\n", "# object = 'disease'\n", "# object = 'phenotypic feature'\n", "object = 'sequence variant'\n", "\n", "query_prefix = f'https://www.ebi.ac.uk/ols/api/ontologies/_ontology/terms/'\n", "mappings = view.get_mappings(object)\n", "if len(mappings) == 0:\n", " print(\"no exact mappings found for: \" + object)\n", "for exact_mapping in mappings.get('exact'):\n", " url = query_prefix.replace(\"_ontology\", exact_mapping.split(\":\")[0]) + \"http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252F\" + exact_mapping.replace(\":\", \"_\")\n", " response = requests.get(url)\n", " if response.status_code == 200:\n", " print(exact_mapping)\n", " element = view.get_element(object)\n", " if element.description is None:\n", " print(\"biolink description is empty\")\n", " else:\n", " print(\"description: \" + element.description +\"\\n\")\n", " if response.json().get('description') is None:\n", " print (\"no definition found via OLS\")\n", " else: \n", " for description in response.json().get('description'):\n", " print(\"OLS description: \" + description)\n", " else:\n", " print(exact_mapping + \": can't find any matching terms in OLS that don't return 404 errors\")\n", " " ] }, { "cell_type": "code", "execution_count": 10, "id": "5fbbc1c1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# is my element a mixin?\n", "\n", "e = view.get_element('gene or gene product')\n", "view.is_mixin(e.name)" ] }, { "cell_type": "code", "execution_count": 22, "id": "5ab2f5c3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gene\n", "gene or gene product\n", "genomic entity\n", "chemical entity or gene or gene product\n", "physical essence\n", "ontology class\n", "biological entity\n", "named thing\n", "entity\n", "physical essence or occurrent\n", "thing with taxon\n", "macromolecular machine mixin\n" ] } ], "source": [ "# view poly hierarchy - a gene is a chemical and biological entity\n", "\n", "ancestors = view.class_ancestors('gene')\n", "for a in ancestors:\n", " print(a)\n" ] }, { "cell_type": "code", "execution_count": 23, "id": "2f9e0c55", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "72\n", "gene to gene association\n", "gene to gene homology association\n", "gene to gene coexpression association\n", "gene to phenotypic feature association\n", "gene to disease association\n", "gene as a model of disease association\n", "gene has variant that contributes to disease association\n", "gene to expression site association\n", "gene to go term association\n", "gene to gene product relationship\n", "gene regulatory relationship\n" ] } ], "source": [ "# how to find the predicates used for gene to disease mappings\n", "# association: \n", "# exact_mappings:\n", "# - OBAN:association\n", "# - rdf:Statement\n", "# - owl:Axiom\n", "# check out the biolink-model onion again\n", "\n", "associations = [c for c in view.all_classes().keys() if view.is_relationship(c)][0:100]\n", "print(len(associations))\n", "for a in associations:\n", " if a.startswith('gene'):\n", " print(a)\n" ] }, { "cell_type": "code", "execution_count": 26, "id": "2779d0d9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gene to disease association\n", "SlotDefinition(name='subject', id_prefixes=[], definition_uri=None, aliases=[], local_names={'ga4gh': LocalName(local_name_source='ga4gh', local_name_value='annotation subject'), 'neo4j': LocalName(local_name_source='neo4j', local_name_value='node with outgoing relationship')}, conforms_to=None, mappings=[], exact_mappings=['owl:annotatedSource', 'OBAN:association_has_subject'], close_mappings=[], related_mappings=[], narrow_mappings=[], broad_mappings=[], extensions={}, annotations={}, description='gene in which variation is correlated with the disease, may be protective or causative or associative, or as a model', alt_descriptions={}, title=None, deprecated=None, todos=[], notes=[], comments=[], examples=[], in_subset=[], from_schema='https://w3id.org/biolink/biolink-model', imported_from=None, see_also=[], deprecated_element_has_exact_replacement=None, deprecated_element_has_possible_replacement=None, is_a='association slot', abstract=None, mixin=None, mixins=[], apply_to=[], values_from=[], created_by=None, created_on=None, last_updated_on=None, modified_by=None, status=None, string_serialization=None, singular_name=None, domain=None, slot_uri='rdf:subject', multivalued=None, inherited=None, readonly=None, ifabsent=None, inlined=None, inlined_as_list=None, key=None, identifier=None, designates_type=None, alias=None, owner='gene to disease association', domain_of=[], subproperty_of=None, symmetric=None, inverse=None, is_class_field=None, role=None, is_usage_slot=None, usage_slot_name=None, range='gene or gene product', range_expression=None, required=True, recommended=None, minimum_value=None, maximum_value=None, pattern=None, equals_string=None, equals_string_in=[], equals_number=None, equals_expression=None, minimum_cardinality=None, maximum_cardinality=None, has_member=None, all_members={}, none_of=[], exactly_one_of=[], any_of=[], all_of=[])\n", "gene as a model of disease association\n", "SlotDefinition(name='subject', id_prefixes=[], definition_uri=None, aliases=[], local_names={'ga4gh': LocalName(local_name_source='ga4gh', local_name_value='annotation subject'), 'neo4j': LocalName(local_name_source='neo4j', local_name_value='node with outgoing relationship')}, conforms_to=None, mappings=[], exact_mappings=['owl:annotatedSource', 'OBAN:association_has_subject'], close_mappings=[], related_mappings=[], narrow_mappings=[], broad_mappings=[], extensions={}, annotations={}, description='A gene that has a role in modeling the disease. This may be a model organism ortholog of a known disease gene, or it may be a gene whose mutants recapitulate core features of the disease.', alt_descriptions={}, title=None, deprecated=None, todos=[], notes=[], comments=[], examples=[], in_subset=[], from_schema='https://w3id.org/biolink/biolink-model', imported_from=None, see_also=[], deprecated_element_has_exact_replacement=None, deprecated_element_has_possible_replacement=None, is_a='association slot', abstract=None, mixin=None, mixins=[], apply_to=[], values_from=[], created_by=None, created_on=None, last_updated_on=None, modified_by=None, status=None, string_serialization=None, singular_name=None, domain=None, slot_uri='rdf:subject', multivalued=None, inherited=None, readonly=None, ifabsent=None, inlined=None, inlined_as_list=None, key=None, identifier=None, designates_type=None, alias=None, owner='gene as a model of disease association', domain_of=[], subproperty_of=None, symmetric=None, inverse=None, is_class_field=None, role=None, is_usage_slot=None, usage_slot_name=None, range='gene or gene product', range_expression=None, required=True, recommended=None, minimum_value=None, maximum_value=None, pattern=None, equals_string=None, equals_string_in=[], equals_number=None, equals_expression=None, minimum_cardinality=None, maximum_cardinality=None, has_member=None, all_members={}, none_of=[], exactly_one_of=[], any_of=[], all_of=[])\n", "gene has variant that contributes to disease association\n", "SlotDefinition(name='subject', id_prefixes=[], definition_uri=None, aliases=[], local_names={'ga4gh': LocalName(local_name_source='ga4gh', local_name_value='annotation subject'), 'neo4j': LocalName(local_name_source='neo4j', local_name_value='node with outgoing relationship')}, conforms_to=None, mappings=[], exact_mappings=['owl:annotatedSource', 'OBAN:association_has_subject'], close_mappings=[], related_mappings=[], narrow_mappings=[], broad_mappings=[], extensions={}, annotations={}, description='A gene that has a role in modeling the disease. This may be a model organism ortholog of a known disease gene, or it may be a gene whose mutants recapitulate core features of the disease.', alt_descriptions={}, title=None, deprecated=None, todos=[], notes=[], comments=[], examples=[], in_subset=[], from_schema='https://w3id.org/biolink/biolink-model', imported_from=None, see_also=[], deprecated_element_has_exact_replacement=None, deprecated_element_has_possible_replacement=None, is_a='association slot', abstract=None, mixin=None, mixins=[], apply_to=[], values_from=[], created_by=None, created_on=None, last_updated_on=None, modified_by=None, status=None, string_serialization=None, singular_name=None, domain=None, slot_uri='rdf:subject', multivalued=None, inherited=None, readonly=None, ifabsent=None, inlined=None, inlined_as_list=None, key=None, identifier=None, designates_type=None, alias=None, owner='gene has variant that contributes to disease association', domain_of=[], subproperty_of=None, symmetric=None, inverse=None, is_class_field=None, role=None, is_usage_slot=None, usage_slot_name=None, range='gene or gene product', range_expression=None, required=True, recommended=None, minimum_value=None, maximum_value=None, pattern=None, equals_string=None, equals_string_in=[], equals_number=None, equals_expression=None, minimum_cardinality=None, maximum_cardinality=None, has_member=None, all_members={}, none_of=[], exactly_one_of=[], any_of=[], all_of=[])\n" ] } ], "source": [ "for association in associations:\n", " domain_element = view.get_element(view.induced_slot('subject', association).range)\n", " range_element = view.get_element(view.induced_slot('object', association).range)\n", " \n", " if 'gene' in view.class_ancestors(domain_element.name) and 'disease' in view.class_ancestors(range_element.name):\n", " print(association)\n", " #print(view.induced_slot('subject', association))\n", " if 'gene or gene product' in view.class_ancestors(domain_element.name) and 'disease' in view.class_ancestors(range_element.name):\n", " print(association)\n", " print(view.induced_slot('subject', association))\n" ] }, { "cell_type": "code", "execution_count": null, "id": "04093dc5", "metadata": {}, "outputs": [], "source": [ "# find predicates for those associations\n", "# at this point, navigating the online doc might be easiest if you just want answers. \n", "# programatically, we can get the predicates that have equivalent domain and range constraints to find which \n", "# coudl be used for associations above.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "6c8ade22", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 5 }