{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from chembl_webresource_client.unichem import unichem_client as unichem" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get src_compound_ids from src_compound_id" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain a list of all `src_compound_ids` from all sources which are **CURRENTLY** assigned to the same structure as a currently assigned query `src_compound_id`. The output will include query `src_compound_id` if it is a valid `src_compound_id` with a current assignment." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('CHEMBL12',1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "25" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'src_compound_id': u'536763', u'src_id': u'10'},\n", " {u'src_compound_id': u'11100-37-1', u'src_id': u'26'},\n", " {u'src_compound_id': u'J2.044C', u'src_id': u'29'},\n", " {u'src_compound_id': u'SAM001246536', u'src_id': u'8'},\n", " {u'src_compound_id': u'diazepam', u'src_id': u'12'},\n", " {u'src_compound_id': u'Q3JTX2Q7TU', u'src_id': u'14'},\n", " {u'src_compound_id': u'ZINC00006427', u'src_id': u'9'},\n", " {u'src_compound_id': u'LSM-2359', u'src_id': u'25'},\n", " {u'src_compound_id': u'CHEMBL12', u'src_id': u'1'},\n", " {u'src_compound_id': u'PA449283', u'src_id': u'17'},\n", " {u'src_compound_id': u'HMDB14967', u'src_id': u'18'},\n", " {u'src_compound_id': u'10016206', u'src_id': u'24'},\n", " {u'src_compound_id': u'49575', u'src_id': u'7'},\n", " {u'src_compound_id': u'14799843', u'src_id': u'21'},\n", " {u'src_compound_id': u'3016', u'src_id': u'22'},\n", " {u'src_compound_id': u'3364', u'src_id': u'4'},\n", " {u'src_compound_id': u'C06948', u'src_id': u'6'},\n", " {u'src_compound_id': u'DB00829', u'src_id': u'2'},\n", " {u'src_compound_id': u'MCULE-8990989144', u'src_id': u'23'},\n", " {u'src_compound_id': u'50000766', u'src_id': u'31'},\n", " {u'src_compound_id': u'DZP', u'src_id': u'3'},\n", " {u'src_compound_id': u'10C2DCBD238615091B9B54A475769A51', u'src_id': u'11'},\n", " {u'src_compound_id': u'C06948', u'src_id': u'27'},\n", " {u'src_compound_id': u'439-14-5', u'src_id': u'26'},\n", " {u'src_compound_id': u'SCHEMBL21442', u'src_id': u'15'}]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note also, that by adding an additional (optional) argument (a valid `src_id`), then results will be restricted to only the source specified with this optional argument." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('CHEMBL12',1,2)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'src_compound_id': u'DB00829'}]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get all src_compound_ids from src_compound_id " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain a list of *all* `src_compound_ids` from all sources (including **BOTH** current **AND** obsolete assignments) to the same structure as a currently assigned query `src_compound_id`. The output will include query `src_compound_id` if it is a valid `src_compound_id` with a current assignment." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('CHEMBL12', 1, all=True)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "26" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'assignment': u'1', u'src_compound_id': u'536763', u'src_id': u'10'},\n", " {u'assignment': u'1', u'src_compound_id': u'11100-37-1', u'src_id': u'26'},\n", " {u'assignment': u'1', u'src_compound_id': u'J2.044C', u'src_id': u'29'},\n", " {u'assignment': u'1', u'src_compound_id': u'SAM001246536', u'src_id': u'8'},\n", " {u'assignment': u'1', u'src_compound_id': u'diazepam', u'src_id': u'12'},\n", " {u'assignment': u'1', u'src_compound_id': u'Q3JTX2Q7TU', u'src_id': u'14'},\n", " {u'assignment': u'1', u'src_compound_id': u'ZINC00006427', u'src_id': u'9'},\n", " {u'assignment': u'1', u'src_compound_id': u'LSM-2359', u'src_id': u'25'},\n", " {u'assignment': u'1', u'src_compound_id': u'CHEMBL12', u'src_id': u'1'},\n", " {u'assignment': u'1', u'src_compound_id': u'PA449283', u'src_id': u'17'},\n", " {u'assignment': u'1', u'src_compound_id': u'HMDB14967', u'src_id': u'18'},\n", " {u'assignment': u'1', u'src_compound_id': u'10016206', u'src_id': u'24'},\n", " {u'assignment': u'1', u'src_compound_id': u'49575', u'src_id': u'7'},\n", " {u'assignment': u'1', u'src_compound_id': u'14799843', u'src_id': u'21'},\n", " {u'assignment': u'1', u'src_compound_id': u'3016', u'src_id': u'22'},\n", " {u'assignment': u'1', u'src_compound_id': u'3364', u'src_id': u'4'},\n", " {u'assignment': u'1', u'src_compound_id': u'C06948', u'src_id': u'6'},\n", " {u'assignment': u'1', u'src_compound_id': u'DB00829', u'src_id': u'2'},\n", " {u'assignment': u'0', u'src_compound_id': u'DB07699', u'src_id': u'2'},\n", " {u'assignment': u'1',\n", " u'src_compound_id': u'MCULE-8990989144',\n", " u'src_id': u'23'},\n", " {u'assignment': u'1', u'src_compound_id': u'50000766', u'src_id': u'31'},\n", " {u'assignment': u'1', u'src_compound_id': u'DZP', u'src_id': u'3'},\n", " {u'assignment': u'1',\n", " u'src_compound_id': u'10C2DCBD238615091B9B54A475769A51',\n", " u'src_id': u'11'},\n", " {u'assignment': u'1', u'src_compound_id': u'C06948', u'src_id': u'27'},\n", " {u'assignment': u'1', u'src_compound_id': u'439-14-5', u'src_id': u'26'},\n", " {u'assignment': u'1', u'src_compound_id': u'SCHEMBL21442', u'src_id': u'15'}]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note also, that by adding an additional (optional) argument (a valid `src_id`), then results will be restricted to only the source specified with this optional argument." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('CHEMBL12', 1, 2, all=True)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'assignment': u'1', u'src_compound_id': u'DB00829'},\n", " {u'assignment': u'0', u'src_compound_id': u'DB07699'}]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get mapping" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain a full mapping between two sources. Uses only currently assigned `src_compound_ids` from both sources. " ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.map(4,1)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "4963" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{u'1': u'CHEMBL293526', u'4': u'2670'}" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get src_compound_ids from InChI Key" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain a list of all `src_compound_ids` (from all sources) which are **CURRENTLY** assigned to a query InChIKey" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('AAOVKJBEBIDNHE-UHFFFAOYSA-N')" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "25" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'src_compound_id': u'536763', u'src_id': u'10'},\n", " {u'src_compound_id': u'11100-37-1', u'src_id': u'26'},\n", " {u'src_compound_id': u'J2.044C', u'src_id': u'29'},\n", " {u'src_compound_id': u'SAM001246536', u'src_id': u'8'},\n", " {u'src_compound_id': u'diazepam', u'src_id': u'12'},\n", " {u'src_compound_id': u'Q3JTX2Q7TU', u'src_id': u'14'},\n", " {u'src_compound_id': u'ZINC00006427', u'src_id': u'9'},\n", " {u'src_compound_id': u'LSM-2359', u'src_id': u'25'},\n", " {u'src_compound_id': u'CHEMBL12', u'src_id': u'1'},\n", " {u'src_compound_id': u'PA449283', u'src_id': u'17'},\n", " {u'src_compound_id': u'HMDB14967', u'src_id': u'18'},\n", " {u'src_compound_id': u'10016206', u'src_id': u'24'},\n", " {u'src_compound_id': u'49575', u'src_id': u'7'},\n", " {u'src_compound_id': u'14799843', u'src_id': u'21'},\n", " {u'src_compound_id': u'3016', u'src_id': u'22'},\n", " {u'src_compound_id': u'3364', u'src_id': u'4'},\n", " {u'src_compound_id': u'C06948', u'src_id': u'6'},\n", " {u'src_compound_id': u'DB00829', u'src_id': u'2'},\n", " {u'src_compound_id': u'MCULE-8990989144', u'src_id': u'23'},\n", " {u'src_compound_id': u'50000766', u'src_id': u'31'},\n", " {u'src_compound_id': u'DZP', u'src_id': u'3'},\n", " {u'src_compound_id': u'10C2DCBD238615091B9B54A475769A51', u'src_id': u'11'},\n", " {u'src_compound_id': u'C06948', u'src_id': u'27'},\n", " {u'src_compound_id': u'439-14-5', u'src_id': u'26'},\n", " {u'src_compound_id': u'SCHEMBL21442', u'src_id': u'15'}]" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get src_compound_ids all from InChIKey " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain a list of *all* `src_compound_ids` (from all sources) which have current **AND** obsolete assignments to a query InChIKey" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('AAOVKJBEBIDNHE-UHFFFAOYSA-N', all=True)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "26" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'assignment': u'1', u'src_compound_id': u'536763', u'src_id': u'10'},\n", " {u'assignment': u'1', u'src_compound_id': u'11100-37-1', u'src_id': u'26'},\n", " {u'assignment': u'1', u'src_compound_id': u'J2.044C', u'src_id': u'29'},\n", " {u'assignment': u'1', u'src_compound_id': u'SAM001246536', u'src_id': u'8'},\n", " {u'assignment': u'1', u'src_compound_id': u'diazepam', u'src_id': u'12'},\n", " {u'assignment': u'1', u'src_compound_id': u'Q3JTX2Q7TU', u'src_id': u'14'},\n", " {u'assignment': u'1', u'src_compound_id': u'ZINC00006427', u'src_id': u'9'},\n", " {u'assignment': u'1', u'src_compound_id': u'LSM-2359', u'src_id': u'25'},\n", " {u'assignment': u'1', u'src_compound_id': u'CHEMBL12', u'src_id': u'1'},\n", " {u'assignment': u'1', u'src_compound_id': u'PA449283', u'src_id': u'17'},\n", " {u'assignment': u'1', u'src_compound_id': u'HMDB14967', u'src_id': u'18'},\n", " {u'assignment': u'1', u'src_compound_id': u'10016206', u'src_id': u'24'},\n", " {u'assignment': u'1', u'src_compound_id': u'49575', u'src_id': u'7'},\n", " {u'assignment': u'1', u'src_compound_id': u'14799843', u'src_id': u'21'},\n", " {u'assignment': u'1', u'src_compound_id': u'3016', u'src_id': u'22'},\n", " {u'assignment': u'1', u'src_compound_id': u'3364', u'src_id': u'4'},\n", " {u'assignment': u'1', u'src_compound_id': u'C06948', u'src_id': u'6'},\n", " {u'assignment': u'1', u'src_compound_id': u'DB00829', u'src_id': u'2'},\n", " {u'assignment': u'0', u'src_compound_id': u'DB07699', u'src_id': u'2'},\n", " {u'assignment': u'1',\n", " u'src_compound_id': u'MCULE-8990989144',\n", " u'src_id': u'23'},\n", " {u'assignment': u'1', u'src_compound_id': u'50000766', u'src_id': u'31'},\n", " {u'assignment': u'1', u'src_compound_id': u'DZP', u'src_id': u'3'},\n", " {u'assignment': u'1',\n", " u'src_compound_id': u'10C2DCBD238615091B9B54A475769A51',\n", " u'src_id': u'11'},\n", " {u'assignment': u'1', u'src_compound_id': u'C06948', u'src_id': u'27'},\n", " {u'assignment': u'1', u'src_compound_id': u'439-14-5', u'src_id': u'26'},\n", " {u'assignment': u'1', u'src_compound_id': u'SCHEMBL21442', u'src_id': u'15'}]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get all src_ids " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain all `src_ids` currently in UniChem" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.src()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "27" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'src_id': u'1'},\n", " {u'src_id': u'2'},\n", " {u'src_id': u'3'},\n", " {u'src_id': u'4'},\n", " {u'src_id': u'5'},\n", " {u'src_id': u'6'},\n", " {u'src_id': u'7'},\n", " {u'src_id': u'8'},\n", " {u'src_id': u'9'},\n", " {u'src_id': u'10'},\n", " {u'src_id': u'11'},\n", " {u'src_id': u'12'},\n", " {u'src_id': u'14'},\n", " {u'src_id': u'15'},\n", " {u'src_id': u'17'},\n", " {u'src_id': u'18'},\n", " {u'src_id': u'20'},\n", " {u'src_id': u'21'},\n", " {u'src_id': u'22'},\n", " {u'src_id': u'23'},\n", " {u'src_id': u'24'},\n", " {u'src_id': u'25'},\n", " {u'src_id': u'26'},\n", " {u'src_id': u'27'},\n", " {u'src_id': u'28'},\n", " {u'src_id': u'29'},\n", " {u'src_id': u'31'}]" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get source infomation " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain *all* information on a source by querying with a source id (`src_id`)." ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.src(1)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'aux_for_url': u'0',\n", " u'base_id_url': u'https://www.ebi.ac.uk/chembldb/compound/inspect/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'A database of bioactive drug-like small molecules and bioactivities abstracted from the scientific literature.',\n", " u'name': u'chembl',\n", " u'name_label': u'ChEMBL',\n", " u'name_long': u'ChEMBL',\n", " u'src_id': u'1',\n", " u'src_url': u'https://www.ebi.ac.uk/chembl/'}]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get structure" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain structure(s) **CURRENTLY** assigned to a query `src_compound_id`." ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.structure('CHEMBL12',1)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'standardinchi': u'InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3',\n", " u'standardinchikey': u'AAOVKJBEBIDNHE-UHFFFAOYSA-N'}]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get all structures " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain structure(s) with current **AND** obsolete assignments to a query `src_compound_id`." ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.structure('CHEMBL12',1, all=True)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'assignment': u'1',\n", " u'standardinchi': u'InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3',\n", " u'standardinchikey': u'AAOVKJBEBIDNHE-UHFFFAOYSA-N'}]" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get URL for src_compound_ids from src_compound_id " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain a list of URLs for all `src_compound_ids`, from a specified source (the `to_src_id`), which are **CURRENTLY** assigned to the same structure as a currently assigned query `src_compound_id`. Method only applicable for sources which support direct URLs to `src_compound_id` pages.\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('CHEMBL12',1, 2, url=True)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'url': u'http://www.drugbank.ca/drugs/DB00829'}]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Method also applicable for `to_src_id`'s where the hyperlink is constructed from auxiliary data [and not from the `src_compound_id`] as per example below." ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('CHEMBL490',1, 15, url=True)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'url': u'https://www.surechembl.org/chemical/SCHEMBL27799'}]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get src_compound_ids all from obsolete src_compound_id " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain a list of all `src_compound_ids` from all sources with **BOTH** current **AND** obsolete to the same structure with an obsolete assignment to the query `src_compound_id`. The output will include query `src_compound_id` if it is a valid `src_compound_id` with an obsolete assignment.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "ret = unichem.get('DB07699',2)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'url': u'https://www.surechembl.org/chemical/SCHEMBL27799'}]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note also, that by adding an additional (optional) argument (a valid `src_id`), then results will be restricted to only the source specified with this optional argument." ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('DB07699',2,1)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'UCI': u'304698', u'assignment': u'1', u'src_compound_id': u'CHEMBL12'}]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get verbose src_compound_ids from InChIKey" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain all `src_compound_ids` (from all sources) which are **CURRENTLY** assigned to a query InChIKey. However, these are returned as part of the following data structure: A list of sources containing these `src_compound_ids`, including source description, base_id_url, etc. One element in this list is a list of the `src_compound_ids` currently assigned to the query InChIKey." ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.get('AAOVKJBEBIDNHE-UHFFFAOYSA-N', verbose=True)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "24" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'aux_for_url': u'0',\n", " u'base_id_url': u'https://www.ebi.ac.uk/chembldb/compound/inspect/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'A database of bioactive drug-like small molecules and bioactivities abstracted from the scientific literature.',\n", " u'name': u'chembl',\n", " u'name_label': u'ChEMBL',\n", " u'name_long': u'ChEMBL',\n", " u'src_compound_id': [u'CHEMBL12'],\n", " u'src_id': u'1',\n", " u'src_url': u'https://www.ebi.ac.uk/chembl/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www.drugbank.ca/drugs/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'A database that combines drug (i.e. chemical, pharmacological and pharmaceutical) data with drug target (i.e. sequence, structure, and pathway) information.',\n", " u'name': u'drugbank',\n", " u'name_label': u'DrugBank',\n", " u'name_long': u'DrugBank',\n", " u'src_compound_id': [u'DB00829'],\n", " u'src_id': u'2',\n", " u'src_url': u'http://drugbank.ca/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www.ebi.ac.uk/pdbe-srv/pdbechem/chemicalCompound/show/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'The European resource for the collection, organisation and dissemination of data on biological macromolecular structures, including structures of small molecule ligands for proteins.',\n", " u'name': u'pdb',\n", " u'name_label': u'PDBe',\n", " u'name_long': u'PDBe (Protein Data Bank Europe)',\n", " u'src_compound_id': [u'DZP'],\n", " u'src_id': u'3',\n", " u'src_url': u'http://www.ebi.ac.uk/pdbe/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www.guidetopharmacology.org/GRAC/LigandDisplayForward?ligandId=',\n", " u'base_id_url_available': u'1',\n", " u'description': u'The IUPHAR (International Union of Basic and Clinical Pharmacology)/BPS (British Pharmacological Society) Guide to PHARMACOLOGY database contains structures of small molecule ligands, peptides and antibodies, with their affinities at protein targets.',\n", " u'name': u'gtopdb',\n", " u'name_label': u'Guide to Pharmacology',\n", " u'name_long': u'Guide to Pharmacology',\n", " u'src_compound_id': [u'3364'],\n", " u'src_id': u'4',\n", " u'src_url': u'http://www.guidetopharmacology.org'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www.genome.jp/dbget-bin/www_bget?',\n", " u'base_id_url_available': u'1',\n", " u'description': u'KEGG LIGAND is a composite DB consisting of COMPOUND, GLYCAN, REACTION, RPAIR, RCLASS, and ENZYME DBs, whose entries are identified by C, G, R, RP, RC, and EC numbers, respectively.',\n", " u'name': u'kegg_ligand',\n", " u'name_label': u'KEGG Ligand',\n", " u'name_long': u'KEGG (Kyoto Encyclopedia of Genes and Genomes) Ligand',\n", " u'src_compound_id': [u'C06948'],\n", " u'src_id': u'6',\n", " u'src_url': u'http://www.genome.jp/kegg/ligand.html'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI%3A',\n", " u'base_id_url_available': u'1',\n", " u'description': u\"ChEBI is a freely available dictionary of molecular entities focused on 'small' chemical compounds\",\n", " u'name': u'chebi',\n", " u'name_label': u'ChEBI',\n", " u'name_long': u'ChEBI (Chemical Entities of Biological Interest).',\n", " u'src_compound_id': [u'49575'],\n", " u'src_id': u'7',\n", " u'src_url': u'http://www.ebi.ac.uk/chebi/downloadsForward.do'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': None,\n", " u'base_id_url_available': u'0',\n", " u'description': u'Collections of plated arrays of small molecules that have a history of use in human clinical trials. Assembled by the National Institutes of Health (NIH) through the Molecular Libraries Roadmap Initiative',\n", " u'name': u'nih_ncc',\n", " u'name_label': u'NIH Clinical Collection',\n", " u'name_long': u'NIH Clinical Collection',\n", " u'src_compound_id': [u'SAM001246536'],\n", " u'src_id': u'8',\n", " u'src_url': u'http://nihsmr.evotec.com/evotec/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://zinc.docking.org/substance/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'A free database of commercially-available compounds for virtual screening, provided by the Shoichet Laboratory in the Department of Pharmaceutical Chemistry at the University of California, San Francisco (UCSF). [Irwin and Shoichet, J. Chem. Inf. Model. 2005;45(1):177-82]',\n", " u'name': u'zinc',\n", " u'name_label': u'ZINC',\n", " u'name_long': u'ZINC',\n", " u'src_compound_id': [u'ZINC00006427'],\n", " u'src_id': u'9',\n", " u'src_url': u'http://zinc.docking.org/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'https://www.emolecules.com/cgi-bin/more?vid=',\n", " u'base_id_url_available': u'1',\n", " u'description': u'A free chemical structure search engine containing millions of public domain structures. Pricing, availabilities, and vendor information requires an eMolecules Plus subscription.',\n", " u'name': u'emolecules',\n", " u'name_label': u'eMolecules',\n", " u'name_long': u'eMolecules',\n", " u'src_compound_id': [u'536763'],\n", " u'src_id': u'10',\n", " u'src_url': u'https://www.emolecules.com/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www-935.ibm.com/services/us/gbs/bao/siip/nih/?sid=',\n", " u'base_id_url_available': u'1',\n", " u'description': u'The data are provided by IBM-NIH and include all chemistry extracted by means of text and image mining from the patent corpus (USPTO, WIPO and EPO) for patent documents published through 31-12-2010. Identifiers in UniChem are IBM compound identifiers.',\n", " u'name': u'ibm',\n", " u'name_label': u'IBM Patent System',\n", " u'name_long': u'IBM strategic IP insight platform and the National Institutes of Health',\n", " u'src_compound_id': [u'10C2DCBD238615091B9B54A475769A51'],\n", " u'src_id': u'11',\n", " u'src_url': u'http://www-935.ibm.com/services/us/gbs/bao/siip/nih/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www.ebi.ac.uk/gxa/query?geneQuery=&exactMatch=true&_exactMatch=on&organism=Any&condition=%22',\n", " u'base_id_url_available': u'1',\n", " u'description': u'The Gene Expression Atlas is a semantically enriched database of meta-analysis based summary statistics over a curated subset of ArrayExpress Archive, servicing queries for condition-specific gene expression patterns as well as broader exploratory searches for biologically interesting genes/samples.',\n", " u'name': u'atlas',\n", " u'name_label': u'Atlas',\n", " u'name_long': u'Gene Expression Atlas',\n", " u'src_compound_id': [u'diazepam'],\n", " u'src_id': u'12',\n", " u'src_url': u'http://www.ebi.ac.uk/gxa/home'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://fdasis.nlm.nih.gov/srs/ProxyServlet?mergeData=true&objectHandle=DBMaint&APPLICATION_NAME=fdasrs&actionHandle=default&nextPage=jsp/srs/ResultScreen.jsp&TXTSUPERLISTID=',\n", " u'base_id_url_available': u'1',\n", " u'description': u'The primary goal of the FDA/USP Substance Registration System (SRS) is to unambiguously define all substances present in regulated products. Once a substance has been defined, the SRS assigns a strong identifier that is permanently associated with the substance: a UNII (Unique Ingredient Identifier). This is a a non-proprietary, free, unique, unambiguous, nonsemantic, alphanumeric identifier based on a substances molecular structure and/or descriptive information.',\n", " u'name': u'fdasrs',\n", " u'name_label': u'FDA SRS',\n", " u'name_long': u'FDA/USP Substance Registration System (SRS)',\n", " u'src_compound_id': [u'Q3JTX2Q7TU'],\n", " u'src_id': u'14',\n", " u'src_url': u'http://fdasis.nlm.nih.gov/srs/srs.jsp'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'https://www.surechembl.org/chemical/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'SureChEMBL automatically extracts chemistry from the full text of all major patent authorities. Compounds are derived from either chemical names found in text or in chemical depictions. All SureChEMBL compounds are included, except those failing UniChem loading rules.',\n", " u'name': u'surechembl',\n", " u'name_label': u'SureChEMBL',\n", " u'name_long': u'SureChEMBL',\n", " u'src_compound_id': [u'SCHEMBL21442'],\n", " u'src_id': u'15',\n", " u'src_url': u'https://www.surechembl.org/search/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www.pharmgkb.org/drug/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'PharmGKB (Pharmacogenomics Knowledgebase) is a comprehensive resource that curates knowledge about the impact of genetic variation on drug response for clinicians and researchers.',\n", " u'name': u'pharmgkb',\n", " u'name_label': u'PharmGKB',\n", " u'name_long': u'PharmGKB',\n", " u'src_compound_id': [u'PA449283'],\n", " u'src_id': u'17',\n", " u'src_url': u'http://www.pharmgkb.org'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www.hmdb.ca/metabolites/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'The Human Metabolome Database (HMDB) is a freely available electronic database containing detailed information about small molecule metabolites found in the human body. It is intended to be used for applications in metabolomics, clinical chemistry, biomarker discovery and general education. The database is designed to contain or link three kinds of data: 1) chemical data, 2) clinical data, and 3) molecular biology/biochemistry data',\n", " u'name': u'hmdb',\n", " u'name_label': u'Human Metabolome Database',\n", " u'name_long': u'Human Metabolome Database (HMDB)',\n", " u'src_compound_id': [u'HMDB14967'],\n", " u'src_id': u'18',\n", " u'src_url': u'http://www.hmdb.ca'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://pubchem.ncbi.nlm.nih.gov/substance/',\n", " u'base_id_url_available': u'1',\n", " u'description': u\"A subset of the PubChem DB: from the original depositor 'Thomson Pharma'.\",\n", " u'name': u'pubchem_tpharma',\n", " u'name_label': u'PubChem: Thomson Pharma ',\n", " u'name_long': u\"PubChem ('Thomson Pharma' subset)\",\n", " u'src_compound_id': [u'14799843'],\n", " u'src_id': u'21',\n", " u'src_url': u'http://www.thomson-pharma.com/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://pubchem.ncbi.nlm.nih.gov/compound/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'A database of normalized PubChem compounds (CIDs) from the PubChem Database.',\n", " u'name': u'pubchem',\n", " u'name_label': u'PubChem',\n", " u'name_long': u'PubChem Compounds',\n", " u'src_compound_id': [u'3016'],\n", " u'src_id': u'22',\n", " u'src_url': u'http://pubchem.ncbi.nlm.nih.gov'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'https://mcule.com/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'An online drug discovery platform with virtual screening and molecular modelling services.',\n", " u'name': u'mcule',\n", " u'name_label': u'Mcule',\n", " u'name_long': u'Mcule',\n", " u'src_compound_id': [u'MCULE-8990989144'],\n", " u'src_id': u'23',\n", " u'src_url': u'https://mcule.com'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://nmrshiftdb.org/molecule/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'An NMR database (web database) for organic structures and their nuclear magnetic resonance (nmr) spectra. It allows for spectrum prediction (13C, 1H and other nuclei) as well as for searching spectra, structures and other properties. Last not least, it features peer-reviewed submission of datasets by its users.',\n", " u'name': u'nmrshiftdb2',\n", " u'name_label': u'NMRShiftDB',\n", " u'name_long': u'NMRShiftDB',\n", " u'src_compound_id': [u'10016206'],\n", " u'src_id': u'24',\n", " u'src_url': u'http://nmrshiftdb.nmr.uni-koeln.de/portal/media-type/html/user/anon/page/default.psml/js_pane/P-Home'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://lincsportal.ccs.miami.edu/entities/#/view/',\n", " u'base_id_url_available': u'1',\n", " u'description': u'The LINCS DCIC facilitates and standardized the information relevant to LINCS assays as described in http://www.lincsproject.org/data/data-standards/',\n", " u'name': u'lincs',\n", " u'name_label': u'LINCS',\n", " u'name_long': u'Library of Integrated Network-based Cellular Signatures',\n", " u'src_compound_id': [u'LSM-2359'],\n", " u'src_id': u'25',\n", " u'src_url': u'http://www.lincsproject.org/'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://actor.epa.gov/actor/chemical.xhtml?casrn=',\n", " u'base_id_url_available': u'1',\n", " u'description': u'ACToR (Aggregated Computational Toxicology Resource)',\n", " u'name': u'actor',\n", " u'name_label': u'ACToR',\n", " u'name_long': u'ACToR',\n", " u'src_compound_id': [u'439-14-5', u'11100-37-1'],\n", " u'src_id': u'26',\n", " u'src_url': u'http://actor.epa.gov/actor/faces/ACToRHome.jsp'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://humanmetabolism.org/?page_id=7&Abbreviation=',\n", " u'base_id_url_available': u'1',\n", " u'description': u'A biochemical knowledge-base on human metabolism',\n", " u'name': u'recon',\n", " u'name_label': u'Recon',\n", " u'name_long': u'Recon',\n", " u'src_compound_id': [u'C06948'],\n", " u'src_id': u'27',\n", " u'src_url': u'http://humanmetabolism.org'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://nikkajiweb.jst.go.jp/nikkaji_web/pages/top_e.jsp?CONTENT=syosai&SN=',\n", " u'base_id_url_available': u'1',\n", " u'description': u' Nakkaji (The Japan Chemical Substance Dictionary) is an organic compound dictionary database prepared by the Japan Science and Technology Agency (JST).',\n", " u'name': u'nikkaji',\n", " u'name_label': u'Nikkaji',\n", " u'name_long': u'Nikkaji',\n", " u'src_compound_id': [u'J2.044C'],\n", " u'src_id': u'29',\n", " u'src_url': u' http://nikkajiweb.jst.go.jp/nikkaji_web/pages/top_e.jsp'},\n", " {u'aux_for_url': u'0',\n", " u'base_id_url': u'http://www.bindingdb.org/bind/chemsearch/marvin/MolStructure.jsp?monomerid=',\n", " u'base_id_url_available': u'1',\n", " u'description': u'A public, web-accessible database of measured binding affinities, focusing chiefly on the interactions of proteins considered to be drug-targets with small, drug-like molecules',\n", " u'name': u'bindingdb',\n", " u'name_label': u'BindingDB',\n", " u'name_long': u'BindingDB',\n", " u'src_compound_id': [u'50000766'],\n", " u'src_id': u'31',\n", " u'src_url': None}]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get auxiliary mappings " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For a single source, obtain a mapping between all current `src_compound_ids` to their corresponding auxiliary data. See [FAQ](https://www.ebi.ac.uk/unichem/info/faq#faq13) for an explanation of *auxiliary data*." ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.map(20)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1890" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{u'auxiliary data': u'Odanacatib-(MK0822).html',\n", " u'src_compound_id': u'Odanacatib-(MK0822)'}" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get Connectivity data from InChIKey" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For the explanation of `a`-`h` arguments, please refer to the [documentation](https://www.ebi.ac.uk/unichem/info/widesearchInfo)." ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.connectivity('QJVHTELASVOWBE-YBABNSIOSA-N', c=4, h=1)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "904" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret['1'])" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[u'BCD9A6D200C14A6D5A11BD4B8F6E002D',\n", " u'11',\n", " None,\n", " u'1',\n", " u'',\n", " u'2',\n", " u'InChI=1S/C16H19N3O5S.C8H9NO5/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7;10-2-1-4-7(8(12)13)9-5(11)3-6(9)14-4/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24);1,6-7,10H,2-3H2,(H,12,13)/b;4-1-/t9?,10-,11+,14-;6-,7-/m11/s1',\n", " u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9?,10-,11?,14-/m1/s1',\n", " u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9?,10-,11+,14-/m1/s1',\n", " u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9?,10-,11?,14-/m1/s1',\n", " u'',\n", " u'0',\n", " u'1',\n", " u'0',\n", " u'0',\n", " u'0',\n", " u'QJVHTELASVOWBE-YBABNSIOSA-N',\n", " u'LSQZJLSUYDQPKJ-JDUQTJRRSA-N']" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret['1'][1]" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.connectivity('QJVHTELASVOWBE-YBABNSIOSA-N',a=1,c=3)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret['1'])" ] }, { "cell_type": "code", "execution_count": 57, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "23" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret['1'][0]['src_matches'])" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{u'B': 0,\n", " u'CpdId_InChIKey': u'ABVRVIZBZKUTMK-JSYANWSFSA-M',\n", " u'Full_CpdId_InChI': u'InChI=1S/C8H9NO5.K/c10-2-1-4-7(8(12)13)9-5(11)3-6(9)14-4;/h1,6-7,10H,2-3H2,(H,12,13);/q;+1/p-1/b4-1-;/t6-,7-;/m1./s1',\n", " u'assignment': u'1',\n", " u'aux_src': None,\n", " u'label': u'K',\n", " u'match_compare': [{u'C': u'3',\n", " u'Matching_CpdId_InChI': u'InChI=1S/C8H9NO5/c10-2-1-4-7(8(12)13)9-5(11)3-6(9)14-4/h1,6-7,10H,2-3H2,(H,12,13)/b4-1-/t6-,7-/m1/s1',\n", " u'Matching_Query_InChI': u'InChI=1S/C8H9NO5/c10-2-1-4-7(8(12)13)9-5(11)3-6(9)14-4/h1,6-7,10H,2-3H2,(H,12,13)/b4-1-/t6-,7-/m1/s1',\n", " u'b': u'0',\n", " u'i': u'0',\n", " u'm': u'0',\n", " u'p': u'',\n", " u's': u'0',\n", " u't': u'0'}],\n", " u'src_compound_id': u'CHEMBL1003'}" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret['1'][0]['src_matches'][3]" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.connectivity('QJVHTELASVOWBE',a=1,c=3)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret['1'])" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "23" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret['1'][0]['src_matches'])" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{u'B': 0,\n", " u'CpdId_InChIKey': u'HOEMALKATXEINB-FLDSSLOCSA-M',\n", " u'Full_CpdId_InChI': u'InChI=1S/C16H19N3O5S.C8H10ClNO5S.K/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7;1-8(3-9)6(7(12)13)10-4(11)2-5(10)16(8,14)15;/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24);5-6H,2-3H2,1H3,(H,12,13);/q;;+1/p-1/t9-,10-,11+,14-;5-,6+,8+;/m11./s1',\n", " u'assignment': u'1',\n", " u'aux_src': None,\n", " u'label': u'K',\n", " u'match_compare': [{u'C': u'3',\n", " u'Matching_CpdId_InChI': u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)/t9-,10-,11+,14-/m1/s1',\n", " u'Matching_Query_InChI': u'InChI=1S/C16H19N3O5S/c1-16(2)11(15(23)24)19-13(22)10(14(19)25-16)18-12(21)9(17)7-3-5-8(20)6-4-7/h3-6,9-11,14,20H,17H2,1-2H3,(H,18,21)(H,23,24)',\n", " u'b': u'0',\n", " u'i': u'0',\n", " u'm': u'1',\n", " u'p': u'',\n", " u's': u'1',\n", " u't': u'1'}],\n", " u'src_compound_id': u'CHEMBL216232'}" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret['1'][0]['src_matches'][5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get Connectivity data from src_compound_id " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For the explanation of `a`-`h` arguments, please refer to the [documentation](https://www.ebi.ac.uk/unichem/info/widesearchInfo)." ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.connectivity('CHEMBL121',1,c=4,h=1)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "460" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret['1'])" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[u'ECCB9B9BF544AE556F4894E754B4D7E5',\n", " u'11',\n", " None,\n", " u'1',\n", " u'',\n", " u'1',\n", " u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',\n", " u'InChI=1S/C18H19N3O3S.C3H8.C2H6/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15;1-3-2;1-2/h2-9,15H,10-12H2,1H3,(H,20,22,23);3H2,1-2H3;1-2H3',\n", " u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',\n", " u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',\n", " u'',\n", " u'0',\n", " u'0',\n", " u'0',\n", " u'0',\n", " u'0',\n", " u'YASAKCUCGLMORW-UHFFFAOYSA-N',\n", " u'IOUOBPYSGZQSGK-UHFFFAOYSA-N']" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret['1'][1]" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.connectivity('CHEMBL121',1)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "18" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret['1'])" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(ret['1'][0]['src_matches'])" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'B': 0,\n", " u'CpdId_InChIKey': u'YASAKCUCGLMORW-OAHLLOKOSA-N',\n", " u'Full_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)/t15-/m1/s1',\n", " u'assignment': u'1',\n", " u'aux_src': None,\n", " u'match_compare': [{u'C': u'0',\n", " u'Matching_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)/t15-/m1/s1',\n", " u'Matching_Query_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',\n", " u'b': u'0',\n", " u'i': u'0',\n", " u'm': u'1',\n", " u'p': u'0',\n", " u's': u'1',\n", " u't': u'1'}],\n", " u'src_compound_id': u'CHEMBL333304'},\n", " {u'B': 0,\n", " u'CpdId_InChIKey': u'YASAKCUCGLMORW-HNNXBMFYSA-N',\n", " u'Full_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)/t15-/m0/s1',\n", " u'assignment': u'1',\n", " u'aux_src': None,\n", " u'match_compare': [{u'C': u'0',\n", " u'Matching_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)/t15-/m0/s1',\n", " u'Matching_Query_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',\n", " u'b': u'0',\n", " u'i': u'0',\n", " u'm': u'1',\n", " u'p': u'0',\n", " u's': u'1',\n", " u't': u'1'}],\n", " u'src_compound_id': u'CHEMBL121106'},\n", " {u'B': 0,\n", " u'CpdId_InChIKey': u'YASAKCUCGLMORW-UHFFFAOYSA-N',\n", " u'Full_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',\n", " u'assignment': u'1',\n", " u'aux_src': None,\n", " u'match_compare': [{u'C': u'0',\n", " u'Matching_CpdId_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',\n", " u'Matching_Query_InChI': u'InChI=1S/C18H19N3O3S/c1-21(16-4-2-3-9-19-16)10-11-24-14-7-5-13(6-8-14)12-15-17(22)20-18(23)25-15/h2-9,15H,10-12H2,1H3,(H,20,22,23)',\n", " u'b': u'0',\n", " u'i': u'0',\n", " u'm': u'0',\n", " u'p': u'0',\n", " u's': u'0',\n", " u't': u'0'}],\n", " u'src_compound_id': u'CHEMBL121'}]" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret['1'][0]['src_matches']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get InChI from InChIKey " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Obtain InChI for InChIKey" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ret = unichem.inchiFromKey('AAOVKJBEBIDNHE-UHFFFAOYSA-N')" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[{u'standardinchi': u'InChI=1S/C16H13ClN2O/c1-19-14-8-7-12(17)9-13(14)16(18-10-15(19)20)11-5-3-2-4-6-11/h2-9H,10H2,1H3'}]" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ret" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" }, "widgets": { "state": {}, "version": "1.1.2" } }, "nbformat": 4, "nbformat_minor": 0 }