In [1]:
# what biolink:category does my identifier represent
# how to find the predicates used for gene to disease mappings


In [14]:
from linkml_runtime.utils.schemaview import SchemaView
import requests 
from pprint import pprint
# note you can also use a path on a local filesystem
view = SchemaView("https://raw.githubusercontent.com/biolink/biolink-model/master/biolink-model.yaml")

In [15]:
# what biolink:category does my identifier represent?
# id_prefixes
# mappings

element = view.get_element('disease')
print(type(element))

element = view.get_element('phenotype of')
print(element.name)


<class 'linkml_runtime.linkml_model.meta.ClassDefinition'>
phenotype of


In [16]:
# find inverses of a predicate
print("inverse is: " + view.inverse(element.name))

inverse is: has phenotype


In [17]:
# id_prefixes
prefixed_categories = view.get_elements_applicable_by_identifier("DOID:4")
print(prefixed_categories)

['disease']


In [18]:
# mappings 

mapped_categories = view.get_category_by_mapping('SO:0001583')
print(mapped_categories)


['is missense variant of']


In [21]:
# object = 'gene'
# object = 'disease'
# object = 'phenotypic feature'
object = 'sequence variant'

query_prefix = f'https://www.ebi.ac.uk/ols/api/ontologies/_ontology/terms/'
mappings = view.get_mappings(object)
if len(mappings) == 0:
 print("no exact mappings found for: " + object)
for exact_mapping in mappings.get('exact'):
 url = query_prefix.replace("_ontology", exact_mapping.split(":")[0]) + "http%253A%252F%252Fpurl.obolibrary.org%252Fobo%252F" + exact_mapping.replace(":", "_")
 response = requests.get(url)
 if response.status_code == 200:
 print(exact_mapping)
 element = view.get_element(object)
 if element.description is None:
 print("biolink description is empty")
 else:
 print("description: " + element.description +"\n")
 if response.json().get('description') is None:
 print ("no definition found via OLS")
 else: 
 for description in response.json().get('description'):
 print("OLS description: " + description)
 else:
 print(exact_mapping + ": can't find any matching terms in OLS that don't return 404 errors")
 

GENO:0000002
description: An allele that varies in its sequence from what is considered the reference allele at that locus.

OLS description: An allele that varies in it sequence from what is considered the reference or canonical sequence at that location.
WIKIDATA:Q15304597: can't find any matching terms in OLS that don't return 404 errors
SIO:010277: can't find any matching terms in OLS that don't return 404 errors
VMC:Allele: can't find any matching terms in OLS that don't return 404 errors
SO:0001059
description: An allele that varies in its sequence from what is considered the reference allele at that locus.

OLS description: A sequence_alteration is a sequence_feature whose extent is the deviation from another sequence.


In [10]:
# is my element a mixin?

e = view.get_element('gene or gene product')
view.is_mixin(e.name)

True

In [22]:
# view poly hierarchy - a gene is a chemical and biological entity

ancestors = view.class_ancestors('gene')
for a in ancestors:
 print(a)


gene
gene or gene product
genomic entity
chemical entity or gene or gene product
physical essence
ontology class
biological entity
named thing
entity
physical essence or occurrent
thing with taxon
macromolecular machine mixin


In [23]:
# how to find the predicates used for gene to disease mappings
# association: 
# exact_mappings:
# - OBAN:association
# - rdf:Statement
# - owl:Axiom
# check out the biolink-model onion again

associations = [c for c in view.all_classes().keys() if view.is_relationship(c)][0:100]
print(len(associations))
for a in associations:
 if a.startswith('gene'):
 print(a)


72
gene to gene association
gene to gene homology association
gene to gene coexpression association
gene to phenotypic feature association
gene to disease association
gene as a model of disease association
gene has variant that contributes to disease association
gene to expression site association
gene to go term association
gene to gene product relationship
gene regulatory relationship


In [26]:
for association in associations:
 domain_element = view.get_element(view.induced_slot('subject', association).range)
 range_element = view.get_element(view.induced_slot('object', association).range)
 
 if 'gene' in view.class_ancestors(domain_element.name) and 'disease' in view.class_ancestors(range_element.name):
 print(association)
 #print(view.induced_slot('subject', association))
 if 'gene or gene product' in view.class_ancestors(domain_element.name) and 'disease' in view.class_ancestors(range_element.name):
 print(association)
 print(view.induced_slot('subject', association))


gene to disease association
SlotDefinition(name='subject', id_prefixes=[], definition_uri=None, aliases=[], local_names={'ga4gh': LocalName(local_name_source='ga4gh', local_name_value='annotation subject'), 'neo4j': LocalName(local_name_source='neo4j', local_name_value='node with outgoing relationship')}, conforms_to=None, mappings=[], exact_mappings=['owl:annotatedSource', 'OBAN:association_has_subject'], close_mappings=[], related_mappings=[], narrow_mappings=[], broad_mappings=[], extensions={}, annotations={}, description='gene in which variation is correlated with the disease, may be protective or causative or associative, or as a model', alt_descriptions={}, title=None, deprecated=None, todos=[], notes=[], comments=[], examples=[], in_subset=[], from_schema='https://w3id.org/biolink/biolink-model', imported_from=None, see_also=[], deprecated_element_has_exact_replacement=None, deprecated_element_has_possible_replacement=None, is_a='association slot', abstract=None, mixin=None, mi

In [None]:
# find predicates for those associations
# at this point, navigating the online doc might be easiest if you just want answers. 
# programatically, we can get the predicates that have equivalent domain and range constraints to find which 
# coudl be used for associations above.
