## Basic querying of GO

See https://github.com/geneontology/helpdesk/issues/22



In [1]:
## Create an ontology factory and use it to make an ontology object, using the handle 'go'
## This will create an ontology object using remote services
from ontobio.ontol_factory import OntologyFactory
ofa = OntologyFactory()
ont = ofa.create('go')



In [2]:
## Create an association factory to get pombase GO annotations
## (this uses an 'eager' method, where all annotations are fetched from services in advance)
from ontobio.assoc_factory import AssociationSetFactory
afactory = AssociationSetFactory()
aset = afactory.create(ontology=ont, subject_category='gene', object_category='function', taxon='NCBITaxon:4896')

In [3]:
## Change this to your gene of interest
CDC2 = 'PomBase:SPBC11B10.09'

## Write id-labels for direct annotations to CDC2
direct_anns = aset.annotations(CDC2)
for t in direct_anns:
 print(" Annotation: {id} '{label}'".format(id=t, label=ont.label(t)))

 Annotation: GO:1903467 'negative regulation of mitotic DNA replication initiation'
 Annotation: GO:0004693 'cyclin-dependent protein serine/threonine kinase activity'
 Annotation: GO:0072686 'mitotic spindle'
 Annotation: GO:0005515 'protein binding'
 Annotation: GO:0031138 'negative regulation of conjugation with cellular fusion'
 Annotation: GO:1905785 'negative regulation of anaphase-promoting complex-dependent catabolic process'
 Annotation: GO:0010389 'regulation of G2/M transition of mitotic cell cycle'
 Annotation: GO:0005829 'cytosol'
 Annotation: GO:0031031 'positive regulation of septation initiation signaling'
 Annotation: GO:0000775 'chromosome, centromeric region'
 Annotation: GO:1900087 'positive regulation of G1/S transition of mitotic cell cycle'
 Annotation: GO:0072434 'signal transduction involved in mitotic G2 DNA damage checkpoint'
 Annotation: GO:0005737 'cytoplasm'
 Annotation: GO:1904537 'negative regulation of mitotic telomere tethering at nuclear periphery'
 A

In [None]:
## TODO: Annotation extensions


In [13]:
## Get ancestors of all direct annotated terms, following is-a and part-of
inferred_anns = ont.traverse_nodes(direct_anns, relations=['subClassOf', 'BFO:0000050'])

## Simple visualization

In [15]:
## Render using graphviz, highlighting directly annotated terms
from ontobio.io.ontol_renderers import GraphRenderer
w = GraphRenderer.create('png')
w.outfile = 'output/cdc2.png'
w.write_subgraph(ont, inferred_anns, query_ids=direct_anns)


![img](output/cdc2.png)

## Grouping annotations by slims or root nodes

In [36]:
## Get 3 GO ontology roots
[mf] = ont.search('molecular_function')
[bp] = ont.search('biological_process')
[cc] = ont.search('cellular_component')
roots = [mf,bp,cc]
roots

['GO:0003674', 'GO:0008150', 'GO:0005575']

In [23]:
## Create a mapping of all nodes to the ontology root
rootmap = ont.create_slim_mapping(subset_nodes=roots, relations='subClassOf')

In [50]:
## Define a simple formatter that breaks annotations into groups/slims
def print_by_group(group_nodes, anns, nodemap):
 for r in group_nodes:
 filtered_anns = [x for x in anns if r in nodemap[x]]
 if len(filtered_anns) > 0:
 print('{} {}'.format(r,ont.label(r)))
 for a in filtered_anns:
 print(' {} {}'.format(a,ont.label(a)))
 
## Print all annotations to CDC2, grouped by ontology
print_by_group(roots, direct_anns, rootmap)

GO:0003674 molecular_function
 GO:0097472 cyclin-dependent protein kinase activity
 GO:0005515 protein binding
 GO:0005524 ATP binding
 GO:0004674 protein serine/threonine kinase activity
 GO:0004693 cyclin-dependent protein serine/threonine kinase activity
 GO:0004672 protein kinase activity
GO:0008150 biological_process
 GO:1905168 positive regulation of double-strand break repair via homologous recombination
 GO:1903465 positive regulation of mitotic cell cycle DNA replication
 GO:0010971 positive regulation of G2/M transition of mitotic cell cycle
 GO:1903467 negative regulation of mitotic DNA replication initiation
 GO:0045842 positive regulation of mitotic metaphase/anaphase transition
 GO:0031031 positive regulation of septation initiation signaling
 GO:1902424 negative regulation of attachment of mitotic spindle microtubules to kinetochore
 GO:1900087 positive regulation of G1/S transition of mitotic cell cycle
 GO:1990820 response to mitotic DNA integrity checkpoint signaling


## Removing redundant links

Note in the report above we see some redundant annotations - nucleus and nuclear chromatin for example

Next we'll filter these out. We use a simple model of redundancy, using minimal relations, and ignoring
evidence

In [51]:
## Create a go-basic ontology, restricted to is-a and part-of
basic_ont = ont.subontology(relations=['subClassOf', 'BFO:0000050'])
nr_anns = basic_ont.filter_redundant(direct_anns)
print("Reduced {} annotations -> {} nonredundant annotations".format(len(direct_anns), len(nr_anns)))

Reduced 40 annotations -> 32 nonredundant annotations


In [52]:
## Report again, this time non-redundant
print_by_group(roots, nr_anns, rootmap)

GO:0003674 molecular_function
 GO:0005515 protein binding
 GO:0005524 ATP binding
 GO:0004693 cyclin-dependent protein serine/threonine kinase activity
GO:0008150 biological_process
 GO:1905168 positive regulation of double-strand break repair via homologous recombination
 GO:1903465 positive regulation of mitotic cell cycle DNA replication
 GO:1903467 negative regulation of mitotic DNA replication initiation
 GO:0010971 positive regulation of G2/M transition of mitotic cell cycle
 GO:0045842 positive regulation of mitotic metaphase/anaphase transition
 GO:0031031 positive regulation of septation initiation signaling
 GO:1902424 negative regulation of attachment of mitotic spindle microtubules to kinetochore
 GO:1990820 response to mitotic DNA integrity checkpoint signaling
 GO:0007089 traversing start control point of mitotic cell cycle
 GO:0072435 response to mitotic G2 DNA damage checkpoint signaling
 GO:0051445 regulation of meiotic cell cycle
 GO:2001033 negative regulation of dou

In [58]:
slim_nodes = ont.extract_subset('goslim_pombe')

In [54]:
slim_map = ont.create_slim_mapping(subset='goslim_pombe', relations=['subClassOf', 'BFO:0000050'])

In [60]:
## Report, with breakdown by slim
print_by_group(slim_nodes, nr_anns, slim_map)

GO:1901990 regulation of mitotic cell cycle phase transition
 GO:0010971 positive regulation of G2/M transition of mitotic cell cycle
 GO:0045842 positive regulation of mitotic metaphase/anaphase transition
 GO:0031031 positive regulation of septation initiation signaling
 GO:0007089 traversing start control point of mitotic cell cycle
 GO:0001100 negative regulation of exit from mitosis
 GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint
GO:0000070 mitotic sister chromatid segregation
 GO:0098783 correction of merotelic kinetochore attachment, mitotic
GO:0008150 biological_process
 GO:1905168 positive regulation of double-strand break repair via homologous recombination
 GO:1903465 positive regulation of mitotic cell cycle DNA replication
 GO:1903467 negative regulation of mitotic DNA replication initiation
 GO:0010971 positive regulation of G2/M transition of mitotic cell cycle
 GO:0045842 positive regulation of mitotic metaphase/anaphase transition
 GO:00310