{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Explore Mappings (xrefs) from Uberon\n", "\n", "Draft notebook showing the xref graph functionality of OntoBio" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "## First fetch ontology\n", "from ontobio.ontol_factory import OntologyFactory\n", "\n", "ofactory = OntologyFactory()\n", "ont = ofactory.create(\"uberon\") ## Connect remotely to Uberon over SPARQL\n", "##\n", "## Note: Jupyter may show '*' to indicate kernel busy while this is being\n", "## fetched - should only take a few seconds. Wait before proceeding" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'UBERON:0006956'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## select a class\n", "[cls] = ont.search(\"buccal mucosa\")\n", "cls" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['FMA:59785',\n", " 'http://linkedlifedata.com/resource/umls/id/C1578559',\n", " 'UMLS:C1578559',\n", " 'http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C12505',\n", " 'CALOHA:TS-2349',\n", " 'http://www.snomedbrowser.com/Codes/Details/16811007',\n", " 'BTO:0003833']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Get xrefs for a class\n", "ont.xrefs(cls)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "100535" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Get a networkx graph object\n", "xg = ont.xref_graph\n", "len(xg.edges())" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['FMA:59785',\n", " 'UMLS:C1578559',\n", " 'UMLS:C1578559',\n", " 'NCIT:C12505',\n", " 'CALOHA:TS-2349',\n", " 'SCTID:16811007',\n", " 'BTO:0003833']" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Hacky convenience function to deal with URL xrefs\n", "## TODO: use prefixcommons\n", "\n", "def contract_xref(x):\n", " x = x.replace('http://linkedlifedata.com/resource/umls/id/','UMLS:')\n", " x = x.replace('http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#', 'NCIT:')\n", " x = x.replace('http://www.snomedbrowser.com/Codes/Details/', 'SCTID:')\n", " return x\n", "\n", "[contract_xref(x) for x in ont.xrefs(cls)]" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "## Prepare to make a DataFrame\n", "## Each item is a class\n", "## Each column is a database\n", "import pandas as pd\n", "items = []\n", "for c in ont.nodes():\n", " label = ont.label(c)\n", " d = dict(id=c, label=label)\n", " items.append(d)\n", " for x in ont.xrefs(c):\n", " toks = contract_xref(x).split(\":\")\n", " if len(toks) == 2:\n", " d[toks[0]] = toks[1]\n", " \n" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | \n", " | AAO | \n", "ABA | \n", "AEO | \n", "AEO_RETIRED | \n", "ANISEED | \n", "BAMS | \n", "BILA | \n", "BILS | \n", "BM | \n", "BSA | \n", "... | \n", "XtroDO | \n", "ZFA | \n", "ZFA_RETIRED | \n", "ZFS | \n", "galen | \n", "http | \n", "https | \n", "ncithesaurus | \n", "nodeID | \n", "span | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | \n", "label | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:3010014 | \n", "inguinal glands | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:2000723 | \n", "obsolete slow muscle cell somite 5 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | 0000723 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0000113 | \n", "post-juvenile adult stage | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | 0000113 | \n", "\n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | 0000044 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0006590 | \n", "remnant of embryonic structure | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0024559 | \n", "obsolete predominantly gray regional part of habenula | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:2001409 | \n", "infraorbital 4 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | 0001409 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0005587 | \n", "rhombomere 7 roof plate | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | 0000951 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:4300088 | \n", "metapterygium bone | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0023317 | \n", "obsolete regional part of ventral cochlear nucleus | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0002929 | \n", "dentate gyrus pyramidal layer | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0000011 | \n", "parasympathetic nervous system | \n", "0010488 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | 0001575 | \n", "\n", " | \n", " | \n", " | //en.wikipedia.org/wiki/Parasympathetic_nervou... | \n", "\n", " | \n", " | \n", " | \n", " |
UBERON:0000325 | \n", "gastric gland | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0010884 | \n", "forelimb bone pre-cartilage condensation | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0002693 | \n", "occipitotemporal sulcus | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | ots | \n", "\n", " | \n", " | Tel-Cx-OTS | \n", "\n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | //braininfo.rprc.washington.edu/centraldirecto... | \n", "\n", " | \n", " | \n", " | \n", " |
UBERON:0001536 | \n", "left common carotid artery plus branches | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | //en.wikipedia.org/wiki/Left_common_carotid_ar... | \n", "\n", " | \n", " | \n", " | \n", " |
UBERON:0004773 | \n", "superior eyelid tarsus | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | //en.wikipedia.org/wiki/Tarsus_%28eyelids%29 | \n", "\n", " | \n", " | \n", " | \n", " |
UBERON:0009138 | \n", "right common cardinal vein | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0001440 | \n", "forelimb skeleton | \n", "0000202 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0006813 | \n", "nasal skeleton | \n", "0000320 | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
UBERON:0005373 | \n", "spinal cord dorsal column | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | dc | \n", "\n", " | \n", " | \n", " | \n", " | ... | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | //braininfo.rprc.washington.edu/centraldirecto... | \n", "\n", " | \n", " | \n", " | \n", " |
20 rows × 98 columns
\n", "