{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Parsing rare list\n", "\n", "https://globalgenes.org/rarelist\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.6/site-packages/urllib3/connectionpool.py:858: InsecureRequestWarning: Unverified HTTPS request is being made. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#ssl-warnings\n", " InsecureRequestWarning)\n" ] } ], "source": [ "# Fetch HTML using requests lib and feed to bs4\n", "import requests\n", "\n", "# note their SSL certificate is not verified. Be careful!\n", "result = requests.get(\"https://globalgenes.org/rarelist\", verify=False)\n", "\n", "from bs4 import BeautifulSoup\n", "from bs4 import NavigableString\n", "soup = BeautifulSoup(result.content, 'html.parser')\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Rare Disease List" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check\n", "soup.title" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# write formatted html to file\n", "# (not used: this is just a useful side effect for exploration)\n", "f=open('rarelist.html','w')\n", "f.write(soup.prettify())\n", "f.close()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Aagenaes syndrome',\n", " 'Aarskog syndrome',\n", " 'Aase Smith syndrome',\n", " 'ABCD syndrome',\n", " 'Abderhalden Kaufmann Lignac syndrome',\n", " 'Abdominal aortic aneurysm',\n", " 'Abdominal chemodectomas with cutaneous angiolipomas',\n", " 'Abdominal cystic lymphangioma',\n", " 'Abdominal obesity metabolic syndrome',\n", " 'Aberrant subclavian artery',\n", " 'Abetalipoproteinemia',\n", " 'Abidi X-linked mental retardation syndrome',\n", " 'Ablepharon macrostomia syndrome',\n", " \"Abrikosov's tumor\",\n", " 'Abruzzo Erickson syndrome',\n", " 'Absence of fingerprints congenital milia',\n", " 'Absence of gluteal muscle',\n", " 'Absence of septum pellucidum',\n", " 'Absence of Tibia',\n", " 'Absence of tibia with polydactyly']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# use bs4 to extract names from HTML\n", "\n", "names = [] ## all disease names found\n", "name2url = {} ## mapping of names to URLs\n", "\n", "h5s = soup.find_all(\"h5\")\n", "for h5 in h5s:\n", " ul = h5.find_next_sibling('ul')\n", " for li in ul.findAll('li'):\n", " if len(li.contents) == 0:\n", " continue\n", " n = li.contents[0]\n", " if n is None:\n", " print('BAD: {}'.format(li))\n", " continue\n", " if not isinstance(n, NavigableString):\n", " n = n.contents[0]\n", " if li.select('a'):\n", " url = li.a['href']\n", " \n", " name2url[n] = url\n", " names.append(n)\n", " \n", "# show the first 20 for sanity checking\n", "names[0:20]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('Acute disseminated encephalomyelitis', 'http://ulf.org/'),\n", " ('Acute hemorrhagic leukoencephalitis', 'http://ulf.org/'),\n", " ('Adrenoleukodystrophy X-linked', 'http://ulf.org/'),\n", " ('Adrenomyeloneuropathy', 'http://ulf.org/'),\n", " ('Aicardi-Goutieres syndrome', 'http://ulf.org/'),\n", " ('Alexander disease', 'http://ulf.org/'),\n", " ('Alkaptonuria', 'http://www.alkaptonuria.info/'),\n", " ('Alpers syndrome',\n", " 'http://www.umdf.org/site/c.8qKOJ0MvF7LUG/b.7929671/k.BDF0/Home.htm'),\n", " ('Alzheimer disease familial', 'http://www.mitoaction.org/'),\n", " ('Alzheimer disease type 1', 'http://www.mitoaction.org/')]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## sanity check URL mapping\n", "list(name2url.items())[0:10]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import csv\n", "with open('rare-list.tsv', 'w', newline='') as csvfile:\n", " spamwriter = csv.writer(csvfile, delimiter='\\t')\n", " for n in names:\n", " spamwriter.writerow([n, name2url.get(n)])" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python3.6/site-packages/cachier/mongo_core.py:24: UserWarning: Cachier warning: pymongo was not found. MongoDB cores will not work.\n", " \"Cachier warning: pymongo was not found. MongoDB cores will not work.\")\n" ] } ], "source": [ "## use ontobio lib for fetching ontologies and lexical mapping\n", "from ontobio import OntologyFactory" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "ofa = OntologyFactory()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "hp = ofa.create('obo:hp')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "mondo = ofa.create('obo:mondo')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "from ontobio.lexmap import LexicalMapEngine\n", "lexmap = LexicalMapEngine()\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "rare handle: None meta: None" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Quick hack to make a degenerate 'ontology' from the list of names\n", "from ontobio import Ontology\n", "\n", "def ont_from_names(names):\n", " ont = Ontology(id='rare')\n", " for n in names:\n", " ## use name as ID\n", " ont.add_node(n, n)\n", " return ont\n", " \n", "rare = ont_from_names(names)\n", "rare" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Aagenaes syndrome',\n", " 'Aarskog syndrome',\n", " 'Aase Smith syndrome',\n", " 'ABCD syndrome',\n", " 'Abderhalden Kaufmann Lignac syndrome',\n", " 'Abdominal aortic aneurysm',\n", " 'Abdominal chemodectomas with cutaneous angiolipomas',\n", " 'Abdominal cystic lymphangioma',\n", " 'Abdominal obesity metabolic syndrome',\n", " 'Aberrant subclavian artery']" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## quick inspection\n", "rare.nodes()[0:10]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:root:Incomplete syn: HP:0000991 \"\" hasRelatedSynonym None [] 1.0\n", "WARNING:root:Incomplete syn: HP:0012377 \"\" hasRelatedSynonym None [] 1.0\n", "WARNING:root:Incomplete syn: HP:0000510 \"\" hasRelatedSynonym None [] 1.0\n", "WARNING:root:Ignoring suspicous synonym: UBERON:0002722 \"4\" hasBroadSynonym None ['http://uri.neuinfo.org/nif/nifstd/birnlex_1488', 'NIFSTD:NeuroNames_abbrevSource'] 1.0\n", "WARNING:root:Ignoring suspicous synonym: UBERON:0001715 \"3\" hasBroadSynonym None ['http://uri.neuinfo.org/nif/nifstd/birnlex_1240', 'NIFSTD:NeuroNames_abbrevSource'] 1.0\n" ] } ], "source": [ "## index the 3 ontologies\n", "lexmap.index_ontology(hp)\n", "lexmap.index_ontology(mondo)\n", "lexmap.index_ontology(rare)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "## CONFIGURE\n", "## we will map R to mondo and hp separately\n", "lexmap.ontology_pairs = [(rare.id, mondo.id), (rare.id, hp.id)]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# align\n", "g = lexmap.get_xref_graph()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
leftleft_labelrightright_labelleft_match_typeright_match_typeleft_match_valright_match_valscoreleft_simscore...conditional_pr_equivpr_subClassOfpr_superClassOfpr_equivalentTopr_otherleft_novelright_novelleft_consistentright_consistentequiv_clique_size
328711-beta-hydroxylase deficiency11-beta-hydroxylase deficiencyMONDO:0008729congenital adrenal hyperplasia due to 11-beta-...labelhasRelatedSynonym11-beta-hydroxylase deficiency11-Beta-Hydroxylase Deficiency50.01.000000...1.0000000.0615810.0615810.7996540.077184TrueTrueFalseFalse7
219915q13.3 microdeletion syndrome15q13.3 microdeletion syndromeMONDO:0012774chromosome 15q13.3 microdeletion syndromelabelhasExactSynonym15q13.3 microdeletion syndrome15q13.3 microdeletion syndrome90.01.000000...1.0000000.0299690.0299690.9187630.021299TrueTrueFalseFalse6
333917-alpha-hydroxylase deficiency17-alpha-hydroxylase deficiencyMONDO:0008730congenital adrenal hyperplasia due to 17-alpha...labelhasRelatedSynonym17-alpha-hydroxylase deficiency17-Alpha-Hydroxylase Deficiency50.01.000000...1.0000000.0615810.0615810.7996540.077184TrueTrueFalseFalse5
348117-beta hydroxysteroid dehydrogenase 3 deficiency17-beta hydroxysteroid dehydrogenase 3 deficiencyMONDO:000991646,XY disorder of sex development due to 17-be...labelhasExactSynonym17-beta hydroxysteroid dehydrogenase 3 deficiency17-beta-hydroxysteroid dehydrogenase 3 deficiency58.01.000000...1.0000000.2059650.2059650.3923940.195675TrueTrueFalseFalse7
259217q21.31 microdeletion syndrome17q21.31 microdeletion syndromeMONDO:0012496Koolen de Vries syndromelabelhasExactSynonym17q21.31 microdeletion syndrome17q21.31 microdeletion syndrome90.01.000000...0.4736840.1680170.0555540.7495910.026839TrueTrueFalseFalse8
259317q21.31 microdeletion syndrome17q21.31 microdeletion syndromeMONDO:001821617q21.31 microdeletion syndromelabellabel17q21.31 microdeletion syndrome17q21.31 microdeletion syndrome100.01.000000...0.5263160.0516710.1082320.8247340.015363TrueTrueFalseFalse8
298718 Hydroxylase deficiency18 Hydroxylase deficiencyMONDO:0008751Corticosterone methyloxidase type 1 deficiencylabelhasRelatedSynonym18 Hydroxylase deficiency18-Hydroxylase Deficiency32.01.000000...0.3555560.2329960.2894820.2835820.193941TrueTrueFalseFalse6
298618 Hydroxylase deficiency18 Hydroxylase deficiencyMONDO:0020489familial hyperreninemic hypoaldosteronism type 1labelhasExactSynonym18 Hydroxylase deficiency18-hydroxylase deficiency58.01.000000...0.6444440.2920460.2101450.3091670.188643TrueTrueFalseFalse6
19601q21.1 microdeletion syndrome1q21.1 microdeletion syndromeMONDO:0012914chromosome 1q21.1 deletion syndromelabelhasExactSynonym1q21.1 microdeletion syndrome1q21.1 microdeletion syndrome90.01.000000...1.0000000.0301090.0301090.9230420.016740TrueTrueFalseFalse6
14282 4-Dienoyl-CoA reductase deficiency2 4-Dienoyl-CoA reductase deficiencyMONDO:0014464progressive encephalopathy with leukodystrophy...labelhasExactSynonym2 4-Dienoyl-CoA reductase deficiency2,4-dienoyl-CoA reductase deficiency58.01.000000...1.0000000.2008030.2008030.3825590.215835TrueTrueFalseFalse5
45142-Hydroxyglutaric aciduria2-Hydroxyglutaric aciduriaMONDO:00160012-hydroxyglutaric acidurialabellabel2-Hydroxyglutaric aciduria2-hydroxyglutaric aciduria100.01.000000...1.0000000.0287580.0287580.9259630.016522TrueTrueFalseFalse7
18882-methyl-3-hydroxybutyric aciduria2-methyl-3-hydroxybutyric aciduriaMONDO:0010327HSD10 diseaselabelhasExactSynonym2-methyl-3-hydroxybutyric aciduria2-methyl-3-hydroxybutyric aciduria90.01.000000...1.0000000.0299690.0299690.9187630.021299TrueTrueFalseFalse5
12022-methylbutyryl-CoA dehydrogenase deficiency2-methylbutyryl-CoA dehydrogenase deficiencyMONDO:00123922-methylbutyryl-CoA dehydrogenase deficiencylabellabel2-methylbutyryl-CoA dehydrogenase deficiency2-methylbutyryl-CoA dehydrogenase deficiency100.01.000000...1.0000000.0287950.0287950.9271690.015241TrueTrueFalseFalse7
328821-hydroxylase deficiency21-hydroxylase deficiencyMONDO:0008728classic congenital adrenal hyperplasia due to ...labelhasRelatedSynonym21-hydroxylase deficiency21-Hydroxylase Deficiency50.01.000000...1.0000000.0615810.0615810.7996540.077184TrueTrueFalseFalse5
350722q11.2 deletion syndrome22q11.2 deletion syndromeMONDO:0008644velocardiofacial syndromelabelhasExactSynonym22q11.2 deletion syndromedeletion 22q11.2 syndrome58.01.000000...0.1347540.1794720.2879380.2820700.250520TrueTrueFalseFalse41
296422q11.2 deletion syndrome22q11.2 deletion syndromeMONDO:001892322q11.2 deletion syndromelabellabel22q11.2 deletion syndrome22q11.2 deletion syndrome100.00.166667...0.1150750.0922230.0359540.8417160.030107TrueTrueFalseFalse41
17213 methylglutaconic aciduria type I3 methylglutaconic aciduria type IMONDO:00096103-methylglutaconic aciduria type 1labellabel3 methylglutaconic aciduria type I3-methylglutaconic aciduria type 164.01.000000...1.0000000.2008030.2008030.3825590.215835TrueTrueFalseFalse9
17203 methylglutaconic aciduria type IV3 methylglutaconic aciduria type IVMONDO:00096113-methylglutaconic aciduria type 4labellabel3 methylglutaconic aciduria type IV3-methylglutaconic aciduria type 464.01.000000...1.0000000.2008030.2008030.3825590.215835TrueTrueFalseFalse8
25803 methylglutaconic aciduria type V3 methylglutaconic aciduria type VMONDO:00124353-methylglutaconic aciduria type 5labellabel3 methylglutaconic aciduria type V3-methylglutaconic aciduria type 564.01.000000...1.0000000.1983420.1983420.3778720.225444TrueTrueFalseFalse7
18773-Hydroxyisobutyric aciduria3-Hydroxyisobutyric aciduriaMONDO:00093713-hydroxyisobutyric acidurialabellabel3-Hydroxyisobutyric aciduria3-hydroxyisobutyric aciduria100.01.000000...1.0000000.0287950.0287950.9271690.015241TrueTrueFalseFalse8
32893-beta-hydroxysteroid dehydrogenase deficiency3-beta-hydroxysteroid dehydrogenase deficiencyMONDO:0008727congenital adrenal hyperplasia due to 3-beta-h...labelhasRelatedSynonym3-beta-hydroxysteroid dehydrogenase deficiency3-Beta-Hydroxysteroid Dehydrogenase Deficiency50.01.000000...1.0000000.0615810.0615810.7996540.077184TrueTrueFalseFalse5
36703-methylglutaconic aciduria type III3-methylglutaconic aciduria type IIIMONDO:00097873-methylglutaconic aciduria type 3labelhasExactSynonym3-methylglutaconic aciduria type III3-methylglutaconic aciduria type III90.01.000000...1.0000000.0299690.0299690.9187630.021299TrueTrueFalseFalse8
7554-hydroxyphenylacetic aciduria4-hydroxyphenylacetic aciduriaHP:00036074-Hydroxyphenylacetic acidurialabellabel4-hydroxyphenylacetic aciduria4-Hydroxyphenylacetic aciduria100.01.000000...1.0000000.0288910.0288910.9302680.011949TrueTrueFalseFalse2
368046 XX testicular disorder of sex development46 XX testicular disorder of sex developmentMONDO:001076646,XX testicular disorder of sex developmentlabellabel46 XX testicular disorder of sex development46,XX testicular disorder of sex development64.01.000000...1.0000000.1983420.1983420.3778720.225444TrueTrueFalseFalse6
313647 XXX syndrome47 XXX syndromeMONDO:0018066trisomy XlabelhasExactSynonym47 XXX syndrome47,XXX syndrome58.01.000000...1.0000000.2264930.1854370.3923940.195675TrueTrueFalseFalse5
316647 XYY syndrome47 XYY syndromeMONDO:001933947,XYY syndromelabellabel47 XYY syndrome47,XYY syndrome64.01.000000...1.0000000.2264930.1854370.3923940.195675TrueTrueFalseFalse5
416449 XXXXX syndrome49 XXXXX syndromeMONDO:0015228pentasomy XlabelhasExactSynonym49 XXXXX syndrome49,XXXXX syndrome58.01.000000...1.0000000.2059650.2059650.3923940.195675TrueTrueFalseFalse5
453149 XXXXY syndrome49 XXXXY syndromeMONDO:001992949,XXXXY syndromelabellabel49 XXXXY syndrome49,XXXXY syndrome64.01.000000...1.0000000.2190010.1793030.3794140.222282TrueTrueFalseFalse6
7105-oxoprolinase deficiency5-oxoprolinase deficiencyMONDO:00098255-oxoprolinase deficiency (disease)labelhasExactSynonym5-oxoprolinase deficiency5-oxoprolinase deficiency90.01.000000...1.0000000.0301090.0301090.9230420.016740TrueTrueFalseFalse7
7095-oxoprolinase deficiency5-oxoprolinase deficiencyHP:00401425-oxoprolinase deficiencylabellabel5-oxoprolinase deficiency5-oxoprolinase deficiency100.01.000000...1.0000000.0288910.0288910.9302680.011949TrueTrueFalseFalse7
..................................................................
2067Wrinkly skin syndromeWrinkly skin syndromeMONDO:0010208Wrinkly skin syndromelabellabelWrinkly skin syndromeWrinkly skin syndrome100.01.000000...1.0000000.0287580.0287580.9259630.016522TrueTrueFalseFalse7
2105X-linked adrenal hypoplasia congenitaX-linked adrenal hypoplasia congenitaMONDO:0010264X-linked adrenal hypoplasia congenitalabellabelX-linked adrenal hypoplasia congenitaX-linked adrenal hypoplasia congenita100.01.000000...1.0000000.0287380.0287380.9253230.017201TrueTrueFalseFalse7
1552X-linked hypohidrotic ectodermal dysplasiaX-linked hypohidrotic ectodermal dysplasiaMONDO:0010585X-linked hypohidrotic ectodermal dysplasialabellabelX-linked hypohidrotic ectodermal dysplasiaX-linked hypohidrotic ectodermal dysplasia100.01.000000...1.0000000.0287380.0287380.9253230.017201TrueTrueFalseFalse4
3900X-linked ichthyosisX-linked ichthyosisMONDO:0010622recessive X-linked ichthyosislabelhasExactSynonymX-linked ichthyosisX-linked ichthyosis90.01.000000...1.0000000.0298860.0298860.9162240.024003TrueTrueFalseFalse7
1968X-linked severe combined immunodeficiencyX-linked severe combined immunodeficiencyMONDO:0010315gamma chain deficiencylabelhasExactSynonymX-linked severe combined immunodeficiencyX-Linked Severe Combined Immunodeficiency90.01.000000...1.0000000.0299690.0299690.9187630.021299TrueTrueFalseFalse8
2543XFE progeroid syndromeXFE progeroid syndromeMONDO:0012590XFE progeroid syndromelabellabelXFE progeroid syndromeXFE progeroid syndrome100.01.000000...1.0000000.0288910.0288910.9302680.011949TrueTrueFalseFalse7
3037XK aprosencephalyXK aprosencephalyMONDO:0008811XK aprosencephalylabellabelXK aprosencephalyXK aprosencephaly100.01.000000...1.0000000.0288910.0288910.9302680.011949TrueTrueFalseFalse8
2070Xanthinuria type 1Xanthinuria type 1MONDO:0010209xanthinuria type IlabellabelXanthinuria type 1xanthinuria type I64.01.000000...1.0000000.2059650.2059650.3923940.195675TrueTrueFalseFalse5
2414Xanthinuria type 2Xanthinuria type 2MONDO:0011346xanthinuria type IIlabellabelXanthinuria type 2xanthinuria type II64.01.000000...1.0000000.2059650.2059650.3923940.195675TrueTrueFalseFalse6
1509Xanthogranulomatous cholecystitisXanthogranulomatous cholecystitisMONDO:0004875xanthogranulomatous cholecystitislabellabelXanthogranulomatous cholecystitisxanthogranulomatous cholecystitis100.01.000000...1.0000000.0287950.0287950.9271690.015241TrueTrueFalseFalse8
2867Xeroderma pigmentosumXeroderma pigmentosumMONDO:0019600xeroderma pigmentosumlabellabelXeroderma pigmentosumxeroderma pigmentosum100.01.000000...1.0000000.0287580.0287580.9259630.016522TrueTrueFalseFalse8
2077Xeroderma pigmentosum variant typeXeroderma pigmentosum variant typeMONDO:0010214xeroderma pigmentosum variant typelabellabelXeroderma pigmentosum variant typexeroderma pigmentosum variant type100.01.000000...1.0000000.0287580.0287580.9259630.016522TrueTrueFalseFalse8
3151YawsYawsMONDO:0006019yawslabellabelYawsyaws100.01.000000...1.0000000.0518300.0518300.8745310.021809TrueTrueFalseFalse10
3080Yellow feverYellow feverMONDO:0020502yellow feverlabellabelYellow feveryellow fever100.01.000000...1.0000000.0288910.0288910.9302680.011949TrueTrueFalseFalse8
4539Yellow nail syndromeYellow nail syndromeMONDO:0007921yellow nail syndromelabellabelYellow nail syndromeyellow nail syndrome100.01.000000...1.0000000.0288910.0288910.9302680.011949TrueTrueFalseFalse9
2555Yemenite deaf-blind hypopigmentation syndromeYemenite deaf-blind hypopigmentation syndromeMONDO:0011133Deaf blind hypopigmentation syndrome, Yemenite...labelhasExactSynonymYemenite deaf-blind hypopigmentation syndromeYemenite deaf-blind hypopigmentation syndrome90.01.000000...1.0000000.0301090.0301090.9230420.016740TrueTrueFalseFalse6
4262Yolk sac tumorYolk sac tumorMONDO:0005744yolk sac tumorlabellabelYolk sac tumoryolk sac tumor100.01.000000...1.0000000.0287580.0287580.9259630.016522TrueTrueFalseFalse7
3775Yorifuji Okuno syndromeYorifuji Okuno syndromeMONDO:0010802pancreatic hypoplasia-diabetes-congenital hear...labelhasExactSynonymYorifuji Okuno syndromeYorifuji-Okuno syndrome58.01.000000...1.0000000.2059650.2059650.3923940.195675TrueTrueFalseFalse5
4330Young Hughes syndromeYoung Hughes syndromeMONDO:0017614X-linked intellectual disability-hypogonadism-...labelhasExactSynonymYoung Hughes syndromeYoung-Hughes syndrome58.01.000000...1.0000000.2008030.2008030.3825590.215835TrueTrueFalseFalse4
2384Young Simpson syndromeYoung Simpson syndromeMONDO:0011365blepharophimosis-intellectual disability syndr...labelhasRelatedSynonymYoung Simpson syndromeYoung-Simpson Syndrome32.01.000000...1.0000000.2008030.2008030.3825590.215835TrueTrueFalseFalse7
2059Young syndromeYoung syndromeMONDO:0010220young syndromelabellabelYoung syndromeyoung syndrome100.01.000000...1.0000000.0288910.0288910.9302680.011949TrueTrueFalseFalse7
2892Yunis Varon syndromeYunis Varon syndromeMONDO:0008995Yunis-Varon syndromelabellabelYunis Varon syndromeYunis-Varon syndrome64.01.000000...1.0000000.0629220.0629220.8170660.057090TrueTrueFalseFalse7
1473Zechi Ceide syndromeZechi Ceide syndromeMONDO:0013036Zechi-Ceide syndromelabellabelZechi Ceide syndromeZechi-Ceide syndrome64.01.000000...1.0000000.2059650.2059650.3923940.195675TrueTrueFalseFalse6
2866Zellweger syndromeZellweger syndromeMONDO:0019609Zellweger syndromelabellabelZellweger syndromeZellweger syndrome100.01.000000...1.0000000.0287580.0287580.9259630.016522TrueTrueFalseFalse6
657Zollinger-Ellison syndromeZollinger-Ellison syndromeMONDO:0006020Zollinger-Ellison syndrome (disease)labelhasExactSynonymZollinger-Ellison syndromeZollinger-Ellison Syndrome90.01.000000...0.4736840.0752510.0621850.8390620.023503TrueTrueFalseFalse11
655Zollinger-Ellison syndromeZollinger-Ellison syndromeHP:0002044Zollinger-Ellison syndromelabellabelZollinger-Ellison syndromeZollinger-Ellison syndrome100.01.000000...1.0000000.0288910.0288910.9302680.011949TrueTrueFalseFalse11
656Zollinger-Ellison syndromeZollinger-Ellison syndromeMONDO:0019610Zollinger-Ellison syndromelabellabelZollinger-Ellison syndromeZollinger-Ellison syndrome100.01.000000...0.5263160.0552950.0456940.8825700.016441TrueTrueFalseFalse11
3377Zori Stalker Williams syndromeZori Stalker Williams syndromeMONDO:0010883pectus excavatum-macrocephaly-dysplastic nails...labelhasExactSynonymZori Stalker Williams syndromeZori-Stalker-Williams syndrome58.01.000000...1.0000000.2059650.2059650.3923940.195675TrueTrueFalseFalse5
2061Zunich neuroectodermal syndromeZunich neuroectodermal syndromeMONDO:0010221CHIME syndromelabelhasRelatedSynonymZunich neuroectodermal syndromeZunich Neuroectodermal Syndrome50.01.000000...1.0000000.0619510.0619510.8044540.071645TrueTrueFalseFalse6
3649ZygomycosisZygomycosisMONDO:0019136zygomycosislabellabelZygomycosiszygomycosis100.01.000000...1.0000000.0518300.0518300.8745310.021809TrueTrueFalseFalse9
\n", "

4558 rows × 22 columns

\n", "
" ], "text/plain": [ " left \\\n", "3287 11-beta-hydroxylase deficiency \n", "2199 15q13.3 microdeletion syndrome \n", "3339 17-alpha-hydroxylase deficiency \n", "3481 17-beta hydroxysteroid dehydrogenase 3 deficiency \n", "2592 17q21.31 microdeletion syndrome \n", "2593 17q21.31 microdeletion syndrome \n", "2987 18 Hydroxylase deficiency \n", "2986 18 Hydroxylase deficiency \n", "1960 1q21.1 microdeletion syndrome \n", "1428 2 4-Dienoyl-CoA reductase deficiency \n", "4514 2-Hydroxyglutaric aciduria \n", "1888 2-methyl-3-hydroxybutyric aciduria \n", "1202 2-methylbutyryl-CoA dehydrogenase deficiency \n", "3288 21-hydroxylase deficiency \n", "3507 22q11.2 deletion syndrome \n", "2964 22q11.2 deletion syndrome \n", "1721 3 methylglutaconic aciduria type I \n", "1720 3 methylglutaconic aciduria type IV \n", "2580 3 methylglutaconic aciduria type V \n", "1877 3-Hydroxyisobutyric aciduria \n", "3289 3-beta-hydroxysteroid dehydrogenase deficiency \n", "3670 3-methylglutaconic aciduria type III \n", "755 4-hydroxyphenylacetic aciduria \n", "3680 46 XX testicular disorder of sex development \n", "3136 47 XXX syndrome \n", "3166 47 XYY syndrome \n", "4164 49 XXXXX syndrome \n", "4531 49 XXXXY syndrome \n", "710 5-oxoprolinase deficiency \n", "709 5-oxoprolinase deficiency \n", "... ... \n", "2067 Wrinkly skin syndrome \n", "2105 X-linked adrenal hypoplasia congenita \n", "1552 X-linked hypohidrotic ectodermal dysplasia \n", "3900 X-linked ichthyosis \n", "1968 X-linked severe combined immunodeficiency \n", "2543 XFE progeroid syndrome \n", "3037 XK aprosencephaly \n", "2070 Xanthinuria type 1 \n", "2414 Xanthinuria type 2 \n", "1509 Xanthogranulomatous cholecystitis \n", "2867 Xeroderma pigmentosum \n", "2077 Xeroderma pigmentosum variant type \n", "3151 Yaws \n", "3080 Yellow fever \n", "4539 Yellow nail syndrome \n", "2555 Yemenite deaf-blind hypopigmentation syndrome \n", "4262 Yolk sac tumor \n", "3775 Yorifuji Okuno syndrome \n", "4330 Young Hughes syndrome \n", "2384 Young Simpson syndrome \n", "2059 Young syndrome \n", "2892 Yunis Varon syndrome \n", "1473 Zechi Ceide syndrome \n", "2866 Zellweger syndrome \n", "657 Zollinger-Ellison syndrome \n", "655 Zollinger-Ellison syndrome \n", "656 Zollinger-Ellison syndrome \n", "3377 Zori Stalker Williams syndrome \n", "2061 Zunich neuroectodermal syndrome \n", "3649 Zygomycosis \n", "\n", " left_label right \\\n", "3287 11-beta-hydroxylase deficiency MONDO:0008729 \n", "2199 15q13.3 microdeletion syndrome MONDO:0012774 \n", "3339 17-alpha-hydroxylase deficiency MONDO:0008730 \n", "3481 17-beta hydroxysteroid dehydrogenase 3 deficiency MONDO:0009916 \n", "2592 17q21.31 microdeletion syndrome MONDO:0012496 \n", "2593 17q21.31 microdeletion syndrome MONDO:0018216 \n", "2987 18 Hydroxylase deficiency MONDO:0008751 \n", "2986 18 Hydroxylase deficiency MONDO:0020489 \n", "1960 1q21.1 microdeletion syndrome MONDO:0012914 \n", "1428 2 4-Dienoyl-CoA reductase deficiency MONDO:0014464 \n", "4514 2-Hydroxyglutaric aciduria MONDO:0016001 \n", "1888 2-methyl-3-hydroxybutyric aciduria MONDO:0010327 \n", "1202 2-methylbutyryl-CoA dehydrogenase deficiency MONDO:0012392 \n", "3288 21-hydroxylase deficiency MONDO:0008728 \n", "3507 22q11.2 deletion syndrome MONDO:0008644 \n", "2964 22q11.2 deletion syndrome MONDO:0018923 \n", "1721 3 methylglutaconic aciduria type I MONDO:0009610 \n", "1720 3 methylglutaconic aciduria type IV MONDO:0009611 \n", "2580 3 methylglutaconic aciduria type V MONDO:0012435 \n", "1877 3-Hydroxyisobutyric aciduria MONDO:0009371 \n", "3289 3-beta-hydroxysteroid dehydrogenase deficiency MONDO:0008727 \n", "3670 3-methylglutaconic aciduria type III MONDO:0009787 \n", "755 4-hydroxyphenylacetic aciduria HP:0003607 \n", "3680 46 XX testicular disorder of sex development MONDO:0010766 \n", "3136 47 XXX syndrome MONDO:0018066 \n", "3166 47 XYY syndrome MONDO:0019339 \n", "4164 49 XXXXX syndrome MONDO:0015228 \n", "4531 49 XXXXY syndrome MONDO:0019929 \n", "710 5-oxoprolinase deficiency MONDO:0009825 \n", "709 5-oxoprolinase deficiency HP:0040142 \n", "... ... ... \n", "2067 Wrinkly skin syndrome MONDO:0010208 \n", "2105 X-linked adrenal hypoplasia congenita MONDO:0010264 \n", "1552 X-linked hypohidrotic ectodermal dysplasia MONDO:0010585 \n", "3900 X-linked ichthyosis MONDO:0010622 \n", "1968 X-linked severe combined immunodeficiency MONDO:0010315 \n", "2543 XFE progeroid syndrome MONDO:0012590 \n", "3037 XK aprosencephaly MONDO:0008811 \n", "2070 Xanthinuria type 1 MONDO:0010209 \n", "2414 Xanthinuria type 2 MONDO:0011346 \n", "1509 Xanthogranulomatous cholecystitis MONDO:0004875 \n", "2867 Xeroderma pigmentosum MONDO:0019600 \n", "2077 Xeroderma pigmentosum variant type MONDO:0010214 \n", "3151 Yaws MONDO:0006019 \n", "3080 Yellow fever MONDO:0020502 \n", "4539 Yellow nail syndrome MONDO:0007921 \n", "2555 Yemenite deaf-blind hypopigmentation syndrome MONDO:0011133 \n", "4262 Yolk sac tumor MONDO:0005744 \n", "3775 Yorifuji Okuno syndrome MONDO:0010802 \n", "4330 Young Hughes syndrome MONDO:0017614 \n", "2384 Young Simpson syndrome MONDO:0011365 \n", "2059 Young syndrome MONDO:0010220 \n", "2892 Yunis Varon syndrome MONDO:0008995 \n", "1473 Zechi Ceide syndrome MONDO:0013036 \n", "2866 Zellweger syndrome MONDO:0019609 \n", "657 Zollinger-Ellison syndrome MONDO:0006020 \n", "655 Zollinger-Ellison syndrome HP:0002044 \n", "656 Zollinger-Ellison syndrome MONDO:0019610 \n", "3377 Zori Stalker Williams syndrome MONDO:0010883 \n", "2061 Zunich neuroectodermal syndrome MONDO:0010221 \n", "3649 Zygomycosis MONDO:0019136 \n", "\n", " right_label left_match_type \\\n", "3287 congenital adrenal hyperplasia due to 11-beta-... label \n", "2199 chromosome 15q13.3 microdeletion syndrome label \n", "3339 congenital adrenal hyperplasia due to 17-alpha... label \n", "3481 46,XY disorder of sex development due to 17-be... label \n", "2592 Koolen de Vries syndrome label \n", "2593 17q21.31 microdeletion syndrome label \n", "2987 Corticosterone methyloxidase type 1 deficiency label \n", "2986 familial hyperreninemic hypoaldosteronism type 1 label \n", "1960 chromosome 1q21.1 deletion syndrome label \n", "1428 progressive encephalopathy with leukodystrophy... label \n", "4514 2-hydroxyglutaric aciduria label \n", "1888 HSD10 disease label \n", "1202 2-methylbutyryl-CoA dehydrogenase deficiency label \n", "3288 classic congenital adrenal hyperplasia due to ... label \n", "3507 velocardiofacial syndrome label \n", "2964 22q11.2 deletion syndrome label \n", "1721 3-methylglutaconic aciduria type 1 label \n", "1720 3-methylglutaconic aciduria type 4 label \n", "2580 3-methylglutaconic aciduria type 5 label \n", "1877 3-hydroxyisobutyric aciduria label \n", "3289 congenital adrenal hyperplasia due to 3-beta-h... label \n", "3670 3-methylglutaconic aciduria type 3 label \n", "755 4-Hydroxyphenylacetic aciduria label \n", "3680 46,XX testicular disorder of sex development label \n", "3136 trisomy X label \n", "3166 47,XYY syndrome label \n", "4164 pentasomy X label \n", "4531 49,XXXXY syndrome label \n", "710 5-oxoprolinase deficiency (disease) label \n", "709 5-oxoprolinase deficiency label \n", "... ... ... \n", "2067 Wrinkly skin syndrome label \n", "2105 X-linked adrenal hypoplasia congenita label \n", "1552 X-linked hypohidrotic ectodermal dysplasia label \n", "3900 recessive X-linked ichthyosis label \n", "1968 gamma chain deficiency label \n", "2543 XFE progeroid syndrome label \n", "3037 XK aprosencephaly label \n", "2070 xanthinuria type I label \n", "2414 xanthinuria type II label \n", "1509 xanthogranulomatous cholecystitis label \n", "2867 xeroderma pigmentosum label \n", "2077 xeroderma pigmentosum variant type label \n", "3151 yaws label \n", "3080 yellow fever label \n", "4539 yellow nail syndrome label \n", "2555 Deaf blind hypopigmentation syndrome, Yemenite... label \n", "4262 yolk sac tumor label \n", "3775 pancreatic hypoplasia-diabetes-congenital hear... label \n", "4330 X-linked intellectual disability-hypogonadism-... label \n", "2384 blepharophimosis-intellectual disability syndr... label \n", "2059 young syndrome label \n", "2892 Yunis-Varon syndrome label \n", "1473 Zechi-Ceide syndrome label \n", "2866 Zellweger syndrome label \n", "657 Zollinger-Ellison syndrome (disease) label \n", "655 Zollinger-Ellison syndrome label \n", "656 Zollinger-Ellison syndrome label \n", "3377 pectus excavatum-macrocephaly-dysplastic nails... label \n", "2061 CHIME syndrome label \n", "3649 zygomycosis label \n", "\n", " right_match_type left_match_val \\\n", "3287 hasRelatedSynonym 11-beta-hydroxylase deficiency \n", "2199 hasExactSynonym 15q13.3 microdeletion syndrome \n", "3339 hasRelatedSynonym 17-alpha-hydroxylase deficiency \n", "3481 hasExactSynonym 17-beta hydroxysteroid dehydrogenase 3 deficiency \n", "2592 hasExactSynonym 17q21.31 microdeletion syndrome \n", "2593 label 17q21.31 microdeletion syndrome \n", "2987 hasRelatedSynonym 18 Hydroxylase deficiency \n", "2986 hasExactSynonym 18 Hydroxylase deficiency \n", "1960 hasExactSynonym 1q21.1 microdeletion syndrome \n", "1428 hasExactSynonym 2 4-Dienoyl-CoA reductase deficiency \n", "4514 label 2-Hydroxyglutaric aciduria \n", "1888 hasExactSynonym 2-methyl-3-hydroxybutyric aciduria \n", "1202 label 2-methylbutyryl-CoA dehydrogenase deficiency \n", "3288 hasRelatedSynonym 21-hydroxylase deficiency \n", "3507 hasExactSynonym 22q11.2 deletion syndrome \n", "2964 label 22q11.2 deletion syndrome \n", "1721 label 3 methylglutaconic aciduria type I \n", "1720 label 3 methylglutaconic aciduria type IV \n", "2580 label 3 methylglutaconic aciduria type V \n", "1877 label 3-Hydroxyisobutyric aciduria \n", "3289 hasRelatedSynonym 3-beta-hydroxysteroid dehydrogenase deficiency \n", "3670 hasExactSynonym 3-methylglutaconic aciduria type III \n", "755 label 4-hydroxyphenylacetic aciduria \n", "3680 label 46 XX testicular disorder of sex development \n", "3136 hasExactSynonym 47 XXX syndrome \n", "3166 label 47 XYY syndrome \n", "4164 hasExactSynonym 49 XXXXX syndrome \n", "4531 label 49 XXXXY syndrome \n", "710 hasExactSynonym 5-oxoprolinase deficiency \n", "709 label 5-oxoprolinase deficiency \n", "... ... ... \n", "2067 label Wrinkly skin syndrome \n", "2105 label X-linked adrenal hypoplasia congenita \n", "1552 label X-linked hypohidrotic ectodermal dysplasia \n", "3900 hasExactSynonym X-linked ichthyosis \n", "1968 hasExactSynonym X-linked severe combined immunodeficiency \n", "2543 label XFE progeroid syndrome \n", "3037 label XK aprosencephaly \n", "2070 label Xanthinuria type 1 \n", "2414 label Xanthinuria type 2 \n", "1509 label Xanthogranulomatous cholecystitis \n", "2867 label Xeroderma pigmentosum \n", "2077 label Xeroderma pigmentosum variant type \n", "3151 label Yaws \n", "3080 label Yellow fever \n", "4539 label Yellow nail syndrome \n", "2555 hasExactSynonym Yemenite deaf-blind hypopigmentation syndrome \n", "4262 label Yolk sac tumor \n", "3775 hasExactSynonym Yorifuji Okuno syndrome \n", "4330 hasExactSynonym Young Hughes syndrome \n", "2384 hasRelatedSynonym Young Simpson syndrome \n", "2059 label Young syndrome \n", "2892 label Yunis Varon syndrome \n", "1473 label Zechi Ceide syndrome \n", "2866 label Zellweger syndrome \n", "657 hasExactSynonym Zollinger-Ellison syndrome \n", "655 label Zollinger-Ellison syndrome \n", "656 label Zollinger-Ellison syndrome \n", "3377 hasExactSynonym Zori Stalker Williams syndrome \n", "2061 hasRelatedSynonym Zunich neuroectodermal syndrome \n", "3649 label Zygomycosis \n", "\n", " right_match_val score left_simscore \\\n", "3287 11-Beta-Hydroxylase Deficiency 50.0 1.000000 \n", "2199 15q13.3 microdeletion syndrome 90.0 1.000000 \n", "3339 17-Alpha-Hydroxylase Deficiency 50.0 1.000000 \n", "3481 17-beta-hydroxysteroid dehydrogenase 3 deficiency 58.0 1.000000 \n", "2592 17q21.31 microdeletion syndrome 90.0 1.000000 \n", "2593 17q21.31 microdeletion syndrome 100.0 1.000000 \n", "2987 18-Hydroxylase Deficiency 32.0 1.000000 \n", "2986 18-hydroxylase deficiency 58.0 1.000000 \n", "1960 1q21.1 microdeletion syndrome 90.0 1.000000 \n", "1428 2,4-dienoyl-CoA reductase deficiency 58.0 1.000000 \n", "4514 2-hydroxyglutaric aciduria 100.0 1.000000 \n", "1888 2-methyl-3-hydroxybutyric aciduria 90.0 1.000000 \n", "1202 2-methylbutyryl-CoA dehydrogenase deficiency 100.0 1.000000 \n", "3288 21-Hydroxylase Deficiency 50.0 1.000000 \n", "3507 deletion 22q11.2 syndrome 58.0 1.000000 \n", "2964 22q11.2 deletion syndrome 100.0 0.166667 \n", "1721 3-methylglutaconic aciduria type 1 64.0 1.000000 \n", "1720 3-methylglutaconic aciduria type 4 64.0 1.000000 \n", "2580 3-methylglutaconic aciduria type 5 64.0 1.000000 \n", "1877 3-hydroxyisobutyric aciduria 100.0 1.000000 \n", "3289 3-Beta-Hydroxysteroid Dehydrogenase Deficiency 50.0 1.000000 \n", "3670 3-methylglutaconic aciduria type III 90.0 1.000000 \n", "755 4-Hydroxyphenylacetic aciduria 100.0 1.000000 \n", "3680 46,XX testicular disorder of sex development 64.0 1.000000 \n", "3136 47,XXX syndrome 58.0 1.000000 \n", "3166 47,XYY syndrome 64.0 1.000000 \n", "4164 49,XXXXX syndrome 58.0 1.000000 \n", "4531 49,XXXXY syndrome 64.0 1.000000 \n", "710 5-oxoprolinase deficiency 90.0 1.000000 \n", "709 5-oxoprolinase deficiency 100.0 1.000000 \n", "... ... ... ... \n", "2067 Wrinkly skin syndrome 100.0 1.000000 \n", "2105 X-linked adrenal hypoplasia congenita 100.0 1.000000 \n", "1552 X-linked hypohidrotic ectodermal dysplasia 100.0 1.000000 \n", "3900 X-linked ichthyosis 90.0 1.000000 \n", "1968 X-Linked Severe Combined Immunodeficiency 90.0 1.000000 \n", "2543 XFE progeroid syndrome 100.0 1.000000 \n", "3037 XK aprosencephaly 100.0 1.000000 \n", "2070 xanthinuria type I 64.0 1.000000 \n", "2414 xanthinuria type II 64.0 1.000000 \n", "1509 xanthogranulomatous cholecystitis 100.0 1.000000 \n", "2867 xeroderma pigmentosum 100.0 1.000000 \n", "2077 xeroderma pigmentosum variant type 100.0 1.000000 \n", "3151 yaws 100.0 1.000000 \n", "3080 yellow fever 100.0 1.000000 \n", "4539 yellow nail syndrome 100.0 1.000000 \n", "2555 Yemenite deaf-blind hypopigmentation syndrome 90.0 1.000000 \n", "4262 yolk sac tumor 100.0 1.000000 \n", "3775 Yorifuji-Okuno syndrome 58.0 1.000000 \n", "4330 Young-Hughes syndrome 58.0 1.000000 \n", "2384 Young-Simpson Syndrome 32.0 1.000000 \n", "2059 young syndrome 100.0 1.000000 \n", "2892 Yunis-Varon syndrome 64.0 1.000000 \n", "1473 Zechi-Ceide syndrome 64.0 1.000000 \n", "2866 Zellweger syndrome 100.0 1.000000 \n", "657 Zollinger-Ellison Syndrome 90.0 1.000000 \n", "655 Zollinger-Ellison syndrome 100.0 1.000000 \n", "656 Zollinger-Ellison syndrome 100.0 1.000000 \n", "3377 Zori-Stalker-Williams syndrome 58.0 1.000000 \n", "2061 Zunich Neuroectodermal Syndrome 50.0 1.000000 \n", "3649 zygomycosis 100.0 1.000000 \n", "\n", " ... conditional_pr_equiv pr_subClassOf pr_superClassOf \\\n", "3287 ... 1.000000 0.061581 0.061581 \n", "2199 ... 1.000000 0.029969 0.029969 \n", "3339 ... 1.000000 0.061581 0.061581 \n", "3481 ... 1.000000 0.205965 0.205965 \n", "2592 ... 0.473684 0.168017 0.055554 \n", "2593 ... 0.526316 0.051671 0.108232 \n", "2987 ... 0.355556 0.232996 0.289482 \n", "2986 ... 0.644444 0.292046 0.210145 \n", "1960 ... 1.000000 0.030109 0.030109 \n", "1428 ... 1.000000 0.200803 0.200803 \n", "4514 ... 1.000000 0.028758 0.028758 \n", "1888 ... 1.000000 0.029969 0.029969 \n", "1202 ... 1.000000 0.028795 0.028795 \n", "3288 ... 1.000000 0.061581 0.061581 \n", "3507 ... 0.134754 0.179472 0.287938 \n", "2964 ... 0.115075 0.092223 0.035954 \n", "1721 ... 1.000000 0.200803 0.200803 \n", "1720 ... 1.000000 0.200803 0.200803 \n", "2580 ... 1.000000 0.198342 0.198342 \n", "1877 ... 1.000000 0.028795 0.028795 \n", "3289 ... 1.000000 0.061581 0.061581 \n", "3670 ... 1.000000 0.029969 0.029969 \n", "755 ... 1.000000 0.028891 0.028891 \n", "3680 ... 1.000000 0.198342 0.198342 \n", "3136 ... 1.000000 0.226493 0.185437 \n", "3166 ... 1.000000 0.226493 0.185437 \n", "4164 ... 1.000000 0.205965 0.205965 \n", "4531 ... 1.000000 0.219001 0.179303 \n", "710 ... 1.000000 0.030109 0.030109 \n", "709 ... 1.000000 0.028891 0.028891 \n", "... ... ... ... ... \n", "2067 ... 1.000000 0.028758 0.028758 \n", "2105 ... 1.000000 0.028738 0.028738 \n", "1552 ... 1.000000 0.028738 0.028738 \n", "3900 ... 1.000000 0.029886 0.029886 \n", "1968 ... 1.000000 0.029969 0.029969 \n", "2543 ... 1.000000 0.028891 0.028891 \n", "3037 ... 1.000000 0.028891 0.028891 \n", "2070 ... 1.000000 0.205965 0.205965 \n", "2414 ... 1.000000 0.205965 0.205965 \n", "1509 ... 1.000000 0.028795 0.028795 \n", "2867 ... 1.000000 0.028758 0.028758 \n", "2077 ... 1.000000 0.028758 0.028758 \n", "3151 ... 1.000000 0.051830 0.051830 \n", "3080 ... 1.000000 0.028891 0.028891 \n", "4539 ... 1.000000 0.028891 0.028891 \n", "2555 ... 1.000000 0.030109 0.030109 \n", "4262 ... 1.000000 0.028758 0.028758 \n", "3775 ... 1.000000 0.205965 0.205965 \n", "4330 ... 1.000000 0.200803 0.200803 \n", "2384 ... 1.000000 0.200803 0.200803 \n", "2059 ... 1.000000 0.028891 0.028891 \n", "2892 ... 1.000000 0.062922 0.062922 \n", "1473 ... 1.000000 0.205965 0.205965 \n", "2866 ... 1.000000 0.028758 0.028758 \n", "657 ... 0.473684 0.075251 0.062185 \n", "655 ... 1.000000 0.028891 0.028891 \n", "656 ... 0.526316 0.055295 0.045694 \n", "3377 ... 1.000000 0.205965 0.205965 \n", "2061 ... 1.000000 0.061951 0.061951 \n", "3649 ... 1.000000 0.051830 0.051830 \n", "\n", " pr_equivalentTo pr_other left_novel right_novel left_consistent \\\n", "3287 0.799654 0.077184 True True False \n", "2199 0.918763 0.021299 True True False \n", "3339 0.799654 0.077184 True True False \n", "3481 0.392394 0.195675 True True False \n", "2592 0.749591 0.026839 True True False \n", "2593 0.824734 0.015363 True True False \n", "2987 0.283582 0.193941 True True False \n", "2986 0.309167 0.188643 True True False \n", "1960 0.923042 0.016740 True True False \n", "1428 0.382559 0.215835 True True False \n", "4514 0.925963 0.016522 True True False \n", "1888 0.918763 0.021299 True True False \n", "1202 0.927169 0.015241 True True False \n", "3288 0.799654 0.077184 True True False \n", "3507 0.282070 0.250520 True True False \n", "2964 0.841716 0.030107 True True False \n", "1721 0.382559 0.215835 True True False \n", "1720 0.382559 0.215835 True True False \n", "2580 0.377872 0.225444 True True False \n", "1877 0.927169 0.015241 True True False \n", "3289 0.799654 0.077184 True True False \n", "3670 0.918763 0.021299 True True False \n", "755 0.930268 0.011949 True True False \n", "3680 0.377872 0.225444 True True False \n", "3136 0.392394 0.195675 True True False \n", "3166 0.392394 0.195675 True True False \n", "4164 0.392394 0.195675 True True False \n", "4531 0.379414 0.222282 True True False \n", "710 0.923042 0.016740 True True False \n", "709 0.930268 0.011949 True True False \n", "... ... ... ... ... ... \n", "2067 0.925963 0.016522 True True False \n", "2105 0.925323 0.017201 True True False \n", "1552 0.925323 0.017201 True True False \n", "3900 0.916224 0.024003 True True False \n", "1968 0.918763 0.021299 True True False \n", "2543 0.930268 0.011949 True True False \n", "3037 0.930268 0.011949 True True False \n", "2070 0.392394 0.195675 True True False \n", "2414 0.392394 0.195675 True True False \n", "1509 0.927169 0.015241 True True False \n", "2867 0.925963 0.016522 True True False \n", "2077 0.925963 0.016522 True True False \n", "3151 0.874531 0.021809 True True False \n", "3080 0.930268 0.011949 True True False \n", "4539 0.930268 0.011949 True True False \n", "2555 0.923042 0.016740 True True False \n", "4262 0.925963 0.016522 True True False \n", "3775 0.392394 0.195675 True True False \n", "4330 0.382559 0.215835 True True False \n", "2384 0.382559 0.215835 True True False \n", "2059 0.930268 0.011949 True True False \n", "2892 0.817066 0.057090 True True False \n", "1473 0.392394 0.195675 True True False \n", "2866 0.925963 0.016522 True True False \n", "657 0.839062 0.023503 True True False \n", "655 0.930268 0.011949 True True False \n", "656 0.882570 0.016441 True True False \n", "3377 0.392394 0.195675 True True False \n", "2061 0.804454 0.071645 True True False \n", "3649 0.874531 0.021809 True True False \n", "\n", " right_consistent equiv_clique_size \n", "3287 False 7 \n", "2199 False 6 \n", "3339 False 5 \n", "3481 False 7 \n", "2592 False 8 \n", "2593 False 8 \n", "2987 False 6 \n", "2986 False 6 \n", "1960 False 6 \n", "1428 False 5 \n", "4514 False 7 \n", "1888 False 5 \n", "1202 False 7 \n", "3288 False 5 \n", "3507 False 41 \n", "2964 False 41 \n", "1721 False 9 \n", "1720 False 8 \n", "2580 False 7 \n", "1877 False 8 \n", "3289 False 5 \n", "3670 False 8 \n", "755 False 2 \n", "3680 False 6 \n", "3136 False 5 \n", "3166 False 5 \n", "4164 False 5 \n", "4531 False 6 \n", "710 False 7 \n", "709 False 7 \n", "... ... ... \n", "2067 False 7 \n", "2105 False 7 \n", "1552 False 4 \n", "3900 False 7 \n", "1968 False 8 \n", "2543 False 7 \n", "3037 False 8 \n", "2070 False 5 \n", "2414 False 6 \n", "1509 False 8 \n", "2867 False 8 \n", "2077 False 8 \n", "3151 False 10 \n", "3080 False 8 \n", "4539 False 9 \n", "2555 False 6 \n", "4262 False 7 \n", "3775 False 5 \n", "4330 False 4 \n", "2384 False 7 \n", "2059 False 7 \n", "2892 False 7 \n", "1473 False 6 \n", "2866 False 6 \n", "657 False 11 \n", "655 False 11 \n", "656 False 11 \n", "3377 False 5 \n", "2061 False 6 \n", "3649 False 9 \n", "\n", "[4558 rows x 22 columns]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# get a dataframe from the mapping graph\n", "df=lexmap.as_dataframe(g)\n", "df" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "## write to file (not used here but can be examined separately)\n", "df.to_csv('rare-matches.tsv', sep=\"\\t\", index=False)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "udf = lexmap.unmapped_dataframe(g)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idlabelmapped_equivs
1805716p11.2 deletion syndrome16p11.2 deletion syndrome
1056462-Methylacetoacetyl CoA thiolase deficiency2-Methylacetoacetyl CoA thiolase deficiency
419052-hydroxyethyl methacrylate sensitization2-hydroxyethyl methacrylate sensitization
2913322q11.2 duplication syndrome22q11.2 duplication syndrome
10042822q13.3 deletion syndrome22q13.3 deletion syndrome
961222q37 deletion syndrome2q37 deletion syndrome
884823 Methylcrotonyl-CoA carboxylase 1 deficiency3 Methylcrotonyl-CoA carboxylase 1 deficiency
345013 alpha methylcrotonyl-CoA carboxylase 2 defic...3 alpha methylcrotonyl-CoA carboxylase 2 defic...
856703-alpha hydroxyacyl-CoA dehydrogenase deficiency3-alpha hydroxyacyl-CoA dehydrogenase deficiency
779293p deletion syndrome3p deletion syndrome
9509546 XX Gonadal dysgenesis epibulbar dermoid46 XX Gonadal dysgenesis epibulbar dermoid
900325-Nucleotidase syndrome5-Nucleotidase syndrome
743746 alpha mercaptopurine sensitivity6 alpha mercaptopurine sensitivity
51486ACTH-independent macronodular adrenal hyperplasiaACTH-independent macronodular adrenal hyperplasia
26334AIDS Dementia ComplexAIDS Dementia Complex
12881AIDS dysmorphic syndromeAIDS dysmorphic syndrome
77859ALK+ histiocytosisALK+ histiocytosis
26275ALS-like syndrome of encephalomyopathyALS-like syndrome of encephalomyopathy
60831Abderhalden Kaufmann Lignac syndromeAbderhalden Kaufmann Lignac syndrome
10975Abdominal chemodectomas with cutaneous angioli...Abdominal chemodectomas with cutaneous angioli...
108580Abdominal cystic lymphangiomaAbdominal cystic lymphangioma
94496Aberrant subclavian arteryAberrant subclavian artery
93116Abidi X-linked mental retardation syndromeAbidi X-linked mental retardation syndrome
40555Absence of fingerprints congenital miliaAbsence of fingerprints congenital milia
7562Absence of gluteal muscleAbsence of gluteal muscle
6259Absence of tibia with polydactylyAbsence of tibia with polydactyly
75137Absent T lymphocytesAbsent T lymphocytes
49480Absent breasts and nipplesAbsent breasts and nipples
56640Abuse dwarfism syndromeAbuse dwarfism syndrome
8138Acanthamoeba infectionAcanthamoeba infection
............
25120http://www.orpha.net/ORDO/Orphanet_99948None[MONDO:0008961]
21037http://www.orpha.net/ORDO/Orphanet_99949None[MONDO:0011113]
17454http://www.orpha.net/ORDO/Orphanet_99950None[MONDO:0011085]
20779http://www.orpha.net/ORDO/Orphanet_99951None[MONDO:0011527]
70495http://www.orpha.net/ORDO/Orphanet_99952None[]
94070http://www.orpha.net/ORDO/Orphanet_99953None[MONDO:0011534]
82089http://www.orpha.net/ORDO/Orphanet_99954None[]
106842http://www.orpha.net/ORDO/Orphanet_99955None[MONDO:0011066]
99610http://www.orpha.net/ORDO/Orphanet_99956None[MONDO:0011475]
10190http://www.orpha.net/ORDO/Orphanet_99960None[]
100192http://www.orpha.net/ORDO/Orphanet_99961None[]
30752http://www.orpha.net/ORDO/Orphanet_99965None[]
51288http://www.orpha.net/ORDO/Orphanet_99966None[]
24502http://www.orpha.net/ORDO/Orphanet_99967None[]
102919http://www.orpha.net/ORDO/Orphanet_99969None[]
79112http://www.orpha.net/ORDO/Orphanet_99970None[]
17426http://www.orpha.net/ORDO/Orphanet_99971None[]
15059http://www.orpha.net/ORDO/Orphanet_99976None[]
65846http://www.orpha.net/ORDO/Orphanet_99977None[]
6381http://www.orpha.net/ORDO/Orphanet_99978None[MONDO:0003345]
3759http://www.orpha.net/ORDO/Orphanet_99981None[]
3847http://www.orpha.net/ORDO/Orphanet_99983None[]
85668http://www.orpha.net/ORDO/Orphanet_99989None[]
4382http://www.orpha.net/ORDO/Orphanet_99990None[]
45866http://www.orpha.net/ORDO/Orphanet_99991None[]
21041http://www.orpha.net/ORDO/Orphanet_99994None[]
65561http://www.orpha.net/ORDO/Orphanet_99995None[]
43256http://www.w3.org/2000/01/rdf-schema#seeAlsoseeAlso
21673http://www.w3.org/2002/07/owl#ThingNone
15400http://www.w3.org/2002/07/owl#topObjectPropertyNone
\n", "

110240 rows × 3 columns

\n", "
" ], "text/plain": [ " id \\\n", "18057 16p11.2 deletion syndrome \n", "105646 2-Methylacetoacetyl CoA thiolase deficiency \n", "41905 2-hydroxyethyl methacrylate sensitization \n", "29133 22q11.2 duplication syndrome \n", "100428 22q13.3 deletion syndrome \n", "96122 2q37 deletion syndrome \n", "88482 3 Methylcrotonyl-CoA carboxylase 1 deficiency \n", "34501 3 alpha methylcrotonyl-CoA carboxylase 2 defic... \n", "85670 3-alpha hydroxyacyl-CoA dehydrogenase deficiency \n", "77929 3p deletion syndrome \n", "95095 46 XX Gonadal dysgenesis epibulbar dermoid \n", "90032 5-Nucleotidase syndrome \n", "74374 6 alpha mercaptopurine sensitivity \n", "51486 ACTH-independent macronodular adrenal hyperplasia \n", "26334 AIDS Dementia Complex \n", "12881 AIDS dysmorphic syndrome \n", "77859 ALK+ histiocytosis \n", "26275 ALS-like syndrome of encephalomyopathy \n", "60831 Abderhalden Kaufmann Lignac syndrome \n", "10975 Abdominal chemodectomas with cutaneous angioli... \n", "108580 Abdominal cystic lymphangioma \n", "94496 Aberrant subclavian artery \n", "93116 Abidi X-linked mental retardation syndrome \n", "40555 Absence of fingerprints congenital milia \n", "7562 Absence of gluteal muscle \n", "6259 Absence of tibia with polydactyly \n", "75137 Absent T lymphocytes \n", "49480 Absent breasts and nipples \n", "56640 Abuse dwarfism syndrome \n", "8138 Acanthamoeba infection \n", "... ... \n", "25120 http://www.orpha.net/ORDO/Orphanet_99948 \n", "21037 http://www.orpha.net/ORDO/Orphanet_99949 \n", "17454 http://www.orpha.net/ORDO/Orphanet_99950 \n", "20779 http://www.orpha.net/ORDO/Orphanet_99951 \n", "70495 http://www.orpha.net/ORDO/Orphanet_99952 \n", "94070 http://www.orpha.net/ORDO/Orphanet_99953 \n", "82089 http://www.orpha.net/ORDO/Orphanet_99954 \n", "106842 http://www.orpha.net/ORDO/Orphanet_99955 \n", "99610 http://www.orpha.net/ORDO/Orphanet_99956 \n", "10190 http://www.orpha.net/ORDO/Orphanet_99960 \n", "100192 http://www.orpha.net/ORDO/Orphanet_99961 \n", "30752 http://www.orpha.net/ORDO/Orphanet_99965 \n", "51288 http://www.orpha.net/ORDO/Orphanet_99966 \n", "24502 http://www.orpha.net/ORDO/Orphanet_99967 \n", "102919 http://www.orpha.net/ORDO/Orphanet_99969 \n", "79112 http://www.orpha.net/ORDO/Orphanet_99970 \n", "17426 http://www.orpha.net/ORDO/Orphanet_99971 \n", "15059 http://www.orpha.net/ORDO/Orphanet_99976 \n", "65846 http://www.orpha.net/ORDO/Orphanet_99977 \n", "6381 http://www.orpha.net/ORDO/Orphanet_99978 \n", "3759 http://www.orpha.net/ORDO/Orphanet_99981 \n", "3847 http://www.orpha.net/ORDO/Orphanet_99983 \n", "85668 http://www.orpha.net/ORDO/Orphanet_99989 \n", "4382 http://www.orpha.net/ORDO/Orphanet_99990 \n", "45866 http://www.orpha.net/ORDO/Orphanet_99991 \n", "21041 http://www.orpha.net/ORDO/Orphanet_99994 \n", "65561 http://www.orpha.net/ORDO/Orphanet_99995 \n", "43256 http://www.w3.org/2000/01/rdf-schema#seeAlso \n", "21673 http://www.w3.org/2002/07/owl#Thing \n", "15400 http://www.w3.org/2002/07/owl#topObjectProperty \n", "\n", " label mapped_equivs \n", "18057 16p11.2 deletion syndrome \n", "105646 2-Methylacetoacetyl CoA thiolase deficiency \n", "41905 2-hydroxyethyl methacrylate sensitization \n", "29133 22q11.2 duplication syndrome \n", "100428 22q13.3 deletion syndrome \n", "96122 2q37 deletion syndrome \n", "88482 3 Methylcrotonyl-CoA carboxylase 1 deficiency \n", "34501 3 alpha methylcrotonyl-CoA carboxylase 2 defic... \n", "85670 3-alpha hydroxyacyl-CoA dehydrogenase deficiency \n", "77929 3p deletion syndrome \n", "95095 46 XX Gonadal dysgenesis epibulbar dermoid \n", "90032 5-Nucleotidase syndrome \n", "74374 6 alpha mercaptopurine sensitivity \n", "51486 ACTH-independent macronodular adrenal hyperplasia \n", "26334 AIDS Dementia Complex \n", "12881 AIDS dysmorphic syndrome \n", "77859 ALK+ histiocytosis \n", "26275 ALS-like syndrome of encephalomyopathy \n", "60831 Abderhalden Kaufmann Lignac syndrome \n", "10975 Abdominal chemodectomas with cutaneous angioli... \n", "108580 Abdominal cystic lymphangioma \n", "94496 Aberrant subclavian artery \n", "93116 Abidi X-linked mental retardation syndrome \n", "40555 Absence of fingerprints congenital milia \n", "7562 Absence of gluteal muscle \n", "6259 Absence of tibia with polydactyly \n", "75137 Absent T lymphocytes \n", "49480 Absent breasts and nipples \n", "56640 Abuse dwarfism syndrome \n", "8138 Acanthamoeba infection \n", "... ... ... \n", "25120 None [MONDO:0008961] \n", "21037 None [MONDO:0011113] \n", "17454 None [MONDO:0011085] \n", "20779 None [MONDO:0011527] \n", "70495 None [] \n", "94070 None [MONDO:0011534] \n", "82089 None [] \n", "106842 None [MONDO:0011066] \n", "99610 None [MONDO:0011475] \n", "10190 None [] \n", "100192 None [] \n", "30752 None [] \n", "51288 None [] \n", "24502 None [] \n", "102919 None [] \n", "79112 None [] \n", "17426 None [] \n", "15059 None [] \n", "65846 None [] \n", "6381 None [MONDO:0003345] \n", "3759 None [] \n", "3847 None [] \n", "85668 None [] \n", "4382 None [] \n", "45866 None [] \n", "21041 None [] \n", "65561 None [] \n", "43256 seeAlso \n", "21673 None \n", "15400 None \n", "\n", "[110240 rows x 3 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## unmapped (TODO this includes unmapped from MONDO/HP to R, which we don't care about so much)\n", "udf.to_csv('rare-no-matches.tsv', sep=\"\\t\", index=False)\n", "udf" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }