{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Basic querying of GO\n", "\n", "See https://github.com/geneontology/helpdesk/issues/22\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/cjm/repos/go-notebooks/venv/lib/python3.5/site-packages/cachier/mongo_core.py:24: UserWarning: Cachier warning: pymongo was not found. MongoDB cores will not work.\n", " \"Cachier warning: pymongo was not found. MongoDB cores will not work.\")\n" ] } ], "source": [ "## Create an ontology factory and use it to make an ontology object, using the handle 'go'\n", "## This will create an ontology object using remote services\n", "from ontobio.ontol_factory import OntologyFactory\n", "ofa = OntologyFactory()\n", "ont = ofa.create('go')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## Create an association factory to get pombase GO annotations\n", "## (this uses an 'eager' method, where all annotations are fetched from services in advance)\n", "from ontobio.assoc_factory import AssociationSetFactory\n", "afactory = AssociationSetFactory()\n", "aset = afactory.create(ontology=ont, subject_category='gene', object_category='function', taxon='NCBITaxon:4896')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Annotation: GO:1903467 'negative regulation of mitotic DNA replication initiation'\n", " Annotation: GO:0004693 'cyclin-dependent protein serine/threonine kinase activity'\n", " Annotation: GO:0072686 'mitotic spindle'\n", " Annotation: GO:0005515 'protein binding'\n", " Annotation: GO:0031138 'negative regulation of conjugation with cellular fusion'\n", " Annotation: GO:1905785 'negative regulation of anaphase-promoting complex-dependent catabolic process'\n", " Annotation: GO:0010389 'regulation of G2/M transition of mitotic cell cycle'\n", " Annotation: GO:0005829 'cytosol'\n", " Annotation: GO:0031031 'positive regulation of septation initiation signaling'\n", " Annotation: GO:0000775 'chromosome, centromeric region'\n", " Annotation: GO:1900087 'positive regulation of G1/S transition of mitotic cell cycle'\n", " Annotation: GO:0072434 'signal transduction involved in mitotic G2 DNA damage checkpoint'\n", " Annotation: GO:0005737 'cytoplasm'\n", " Annotation: GO:1904537 'negative regulation of mitotic telomere tethering at nuclear periphery'\n", " Annotation: GO:0007089 'traversing start control point of mitotic cell cycle'\n", " Annotation: GO:0072435 'response to mitotic G2 DNA damage checkpoint signaling'\n", " Annotation: GO:1990820 'response to mitotic DNA integrity checkpoint signaling'\n", " Annotation: GO:0010971 'positive regulation of G2/M transition of mitotic cell cycle'\n", " Annotation: GO:0097472 'cyclin-dependent protein kinase activity'\n", " Annotation: GO:0004672 'protein kinase activity'\n", " Annotation: GO:1905168 'positive regulation of double-strand break repair via homologous recombination'\n", " Annotation: GO:1990023 'mitotic spindle midzone'\n", " Annotation: GO:1903465 'positive regulation of mitotic cell cycle DNA replication'\n", " Annotation: GO:0072429 'response to intra-S DNA damage checkpoint signaling'\n", " Annotation: GO:0004674 'protein serine/threonine kinase activity'\n", " Annotation: GO:0005634 'nucleus'\n", " Annotation: GO:1902845 'negative regulation of mitotic spindle elongation'\n", " Annotation: GO:0001100 'negative regulation of exit from mitosis'\n", " Annotation: GO:0005524 'ATP binding'\n", " Annotation: GO:1903380 'positive regulation of mitotic chromosome condensation'\n", " Annotation: GO:0044732 'mitotic spindle pole body'\n", " Annotation: GO:0045842 'positive regulation of mitotic metaphase/anaphase transition'\n", " Annotation: GO:0000307 'cyclin-dependent protein kinase holoenzyme complex'\n", " Annotation: GO:0000790 'nuclear chromatin'\n", " Annotation: GO:1902424 'negative regulation of attachment of mitotic spindle microtubules to kinetochore'\n", " Annotation: GO:2001033 'negative regulation of double-strand break repair via nonhomologous end joining'\n", " Annotation: GO:0098783 'correction of merotelic kinetochore attachment, mitotic'\n", " Annotation: GO:0051445 'regulation of meiotic cell cycle'\n", " Annotation: GO:0035974 'meiotic spindle pole body'\n", " Annotation: GO:0008361 'regulation of cell size'\n" ] } ], "source": [ "## Change this to your gene of interest\n", "CDC2 = 'PomBase:SPBC11B10.09'\n", "\n", "## Write id-labels for direct annotations to CDC2\n", "direct_anns = aset.annotations(CDC2)\n", "for t in direct_anns:\n", " print(\" Annotation: {id} '{label}'\".format(id=t, label=ont.label(t)))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## TODO: Annotation extensions\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## Get ancestors of all direct annotated terms, following is-a and part-of\n", "inferred_anns = ont.traverse_nodes(direct_anns, relations=['subClassOf', 'BFO:0000050'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Simple visualization" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## Render using graphviz, highlighting directly annotated terms\n", "from ontobio.io.ontol_renderers import GraphRenderer\n", "w = GraphRenderer.create('png')\n", "w.outfile = 'output/cdc2.png'\n", "w.write_subgraph(ont, inferred_anns, query_ids=direct_anns)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "![img](output/cdc2.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Grouping annotations by slims or root nodes" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['GO:0003674', 'GO:0008150', 'GO:0005575']" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Get 3 GO ontology roots\n", "[mf] = ont.search('molecular_function')\n", "[bp] = ont.search('biological_process')\n", "[cc] = ont.search('cellular_component')\n", "roots = [mf,bp,cc]\n", "roots" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "## Create a mapping of all nodes to the ontology root\n", "rootmap = ont.create_slim_mapping(subset_nodes=roots, relations='subClassOf')" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GO:0003674 molecular_function\n", " GO:0097472 cyclin-dependent protein kinase activity\n", " GO:0005515 protein binding\n", " GO:0005524 ATP binding\n", " GO:0004674 protein serine/threonine kinase activity\n", " GO:0004693 cyclin-dependent protein serine/threonine kinase activity\n", " GO:0004672 protein kinase activity\n", "GO:0008150 biological_process\n", " GO:1905168 positive regulation of double-strand break repair via homologous recombination\n", " GO:1903465 positive regulation of mitotic cell cycle DNA replication\n", " GO:0010971 positive regulation of G2/M transition of mitotic cell cycle\n", " GO:1903467 negative regulation of mitotic DNA replication initiation\n", " GO:0045842 positive regulation of mitotic metaphase/anaphase transition\n", " GO:0031031 positive regulation of septation initiation signaling\n", " GO:1902424 negative regulation of attachment of mitotic spindle microtubules to kinetochore\n", " GO:1900087 positive regulation of G1/S transition of mitotic cell cycle\n", " GO:1990820 response to mitotic DNA integrity checkpoint signaling\n", " GO:0007089 traversing start control point of mitotic cell cycle\n", " GO:0072435 response to mitotic G2 DNA damage checkpoint signaling\n", " GO:0051445 regulation of meiotic cell cycle\n", " GO:2001033 negative regulation of double-strand break repair via nonhomologous end joining\n", " GO:0008361 regulation of cell size\n", " GO:1902845 negative regulation of mitotic spindle elongation\n", " GO:0010389 regulation of G2/M transition of mitotic cell cycle\n", " GO:0001100 negative regulation of exit from mitosis\n", " GO:1903380 positive regulation of mitotic chromosome condensation\n", " GO:0072429 response to intra-S DNA damage checkpoint signaling\n", " GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint\n", " GO:0098783 correction of merotelic kinetochore attachment, mitotic\n", " GO:0031138 negative regulation of conjugation with cellular fusion\n", " GO:1904537 negative regulation of mitotic telomere tethering at nuclear periphery\n", " GO:1905785 negative regulation of anaphase-promoting complex-dependent catabolic process\n", "GO:0005575 cellular_component\n", " GO:0000775 chromosome, centromeric region\n", " GO:0044732 mitotic spindle pole body\n", " GO:0035974 meiotic spindle pole body\n", " GO:0005829 cytosol\n", " GO:0005737 cytoplasm\n", " GO:0005634 nucleus\n", " GO:0000790 nuclear chromatin\n", " GO:0000307 cyclin-dependent protein kinase holoenzyme complex\n", " GO:0072686 mitotic spindle\n", " GO:1990023 mitotic spindle midzone\n" ] } ], "source": [ "## Define a simple formatter that breaks annotations into groups/slims\n", "def print_by_group(group_nodes, anns, nodemap):\n", " for r in group_nodes:\n", " filtered_anns = [x for x in anns if r in nodemap[x]]\n", " if len(filtered_anns) > 0:\n", " print('{} {}'.format(r,ont.label(r)))\n", " for a in filtered_anns:\n", " print(' {} {}'.format(a,ont.label(a)))\n", " \n", "## Print all annotations to CDC2, grouped by ontology\n", "print_by_group(roots, direct_anns, rootmap)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Removing redundant links\n", "\n", "Note in the report above we see some redundant annotations - nucleus and nuclear chromatin for example\n", "\n", "Next we'll filter these out. We use a simple model of redundancy, using minimal relations, and ignoring\n", "evidence" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Reduced 40 annotations -> 32 nonredundant annotations\n" ] } ], "source": [ "## Create a go-basic ontology, restricted to is-a and part-of\n", "basic_ont = ont.subontology(relations=['subClassOf', 'BFO:0000050'])\n", "nr_anns = basic_ont.filter_redundant(direct_anns)\n", "print(\"Reduced {} annotations -> {} nonredundant annotations\".format(len(direct_anns), len(nr_anns)))" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GO:0003674 molecular_function\n", " GO:0005515 protein binding\n", " GO:0005524 ATP binding\n", " GO:0004693 cyclin-dependent protein serine/threonine kinase activity\n", "GO:0008150 biological_process\n", " GO:1905168 positive regulation of double-strand break repair via homologous recombination\n", " GO:1903465 positive regulation of mitotic cell cycle DNA replication\n", " GO:1903467 negative regulation of mitotic DNA replication initiation\n", " GO:0010971 positive regulation of G2/M transition of mitotic cell cycle\n", " GO:0045842 positive regulation of mitotic metaphase/anaphase transition\n", " GO:0031031 positive regulation of septation initiation signaling\n", " GO:1902424 negative regulation of attachment of mitotic spindle microtubules to kinetochore\n", " GO:1990820 response to mitotic DNA integrity checkpoint signaling\n", " GO:0007089 traversing start control point of mitotic cell cycle\n", " GO:0072435 response to mitotic G2 DNA damage checkpoint signaling\n", " GO:0051445 regulation of meiotic cell cycle\n", " GO:2001033 negative regulation of double-strand break repair via nonhomologous end joining\n", " GO:0008361 regulation of cell size\n", " GO:1902845 negative regulation of mitotic spindle elongation\n", " GO:0001100 negative regulation of exit from mitosis\n", " GO:1903380 positive regulation of mitotic chromosome condensation\n", " GO:0072429 response to intra-S DNA damage checkpoint signaling\n", " GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint\n", " GO:0098783 correction of merotelic kinetochore attachment, mitotic\n", " GO:0031138 negative regulation of conjugation with cellular fusion\n", " GO:1904537 negative regulation of mitotic telomere tethering at nuclear periphery\n", " GO:1905785 negative regulation of anaphase-promoting complex-dependent catabolic process\n", "GO:0005575 cellular_component\n", " GO:0000775 chromosome, centromeric region\n", " GO:0044732 mitotic spindle pole body\n", " GO:0035974 meiotic spindle pole body\n", " GO:0005829 cytosol\n", " GO:0000790 nuclear chromatin\n", " GO:0000307 cyclin-dependent protein kinase holoenzyme complex\n", " GO:1990023 mitotic spindle midzone\n" ] } ], "source": [ "## Report again, this time non-redundant\n", "print_by_group(roots, nr_anns, rootmap)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": true }, "outputs": [], "source": [ "slim_nodes = ont.extract_subset('goslim_pombe')" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": true }, "outputs": [], "source": [ "slim_map = ont.create_slim_mapping(subset='goslim_pombe', relations=['subClassOf', 'BFO:0000050'])" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GO:1901990 regulation of mitotic cell cycle phase transition\n", " GO:0010971 positive regulation of G2/M transition of mitotic cell cycle\n", " GO:0045842 positive regulation of mitotic metaphase/anaphase transition\n", " GO:0031031 positive regulation of septation initiation signaling\n", " GO:0007089 traversing start control point of mitotic cell cycle\n", " GO:0001100 negative regulation of exit from mitosis\n", " GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint\n", "GO:0000070 mitotic sister chromatid segregation\n", " GO:0098783 correction of merotelic kinetochore attachment, mitotic\n", "GO:0008150 biological_process\n", " GO:1905168 positive regulation of double-strand break repair via homologous recombination\n", " GO:1903465 positive regulation of mitotic cell cycle DNA replication\n", " GO:1903467 negative regulation of mitotic DNA replication initiation\n", " GO:0010971 positive regulation of G2/M transition of mitotic cell cycle\n", " GO:0045842 positive regulation of mitotic metaphase/anaphase transition\n", " GO:0031031 positive regulation of septation initiation signaling\n", " GO:1902424 negative regulation of attachment of mitotic spindle microtubules to kinetochore\n", " GO:1990820 response to mitotic DNA integrity checkpoint signaling\n", " GO:0007089 traversing start control point of mitotic cell cycle\n", " GO:0072435 response to mitotic G2 DNA damage checkpoint signaling\n", " GO:0051445 regulation of meiotic cell cycle\n", " GO:2001033 negative regulation of double-strand break repair via nonhomologous end joining\n", " GO:0008361 regulation of cell size\n", " GO:1902845 negative regulation of mitotic spindle elongation\n", " GO:0001100 negative regulation of exit from mitosis\n", " GO:0004693 cyclin-dependent protein serine/threonine kinase activity\n", " GO:1903380 positive regulation of mitotic chromosome condensation\n", " GO:0072429 response to intra-S DNA damage checkpoint signaling\n", " GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint\n", " GO:0098783 correction of merotelic kinetochore attachment, mitotic\n", " GO:0031138 negative regulation of conjugation with cellular fusion\n", " GO:1904537 negative regulation of mitotic telomere tethering at nuclear periphery\n", " GO:1905785 negative regulation of anaphase-promoting complex-dependent catabolic process\n", "GO:0023052 signaling\n", " GO:0072434 signal transduction involved in mitotic G2 DNA damage checkpoint\n" ] } ], "source": [ "## Report, with breakdown by slim\n", "print_by_group(slim_nodes, nr_anns, slim_map)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" } }, "nbformat": 4, "nbformat_minor": 2 }