{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import print_function\n", "\n", "import csv\n", "import sys\n", "import xml.etree.ElementTree as ET\n", " \n", "import pandas as pd\n", "import requests\n", "\n", "if sys.version_info.major == 2:\n", " from StringIO import StringIO\n", "else:\n", " from io import StringIO" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_psiquic(service, query, full_url=False, **kwargs):\n", " kwargs['format'] = kwargs.get('format', 'tab27')\n", " if full_url:\n", " req = requests.get('%s%s' % (service, query), params=kwargs)\n", " else:\n", " server = 'http://www.ebi.ac.uk/Tools/webservices/psicquic'\n", " req = requests.get('%s/%s/%s' % (server, service, query), params=kwargs)\n", " if not req.ok:\n", " req.raise_for_status()\n", " return req.content" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nameorg_url
1BioGridhttp://www.thebiogrid.org/
2bhf-uclhttp://www.ucl.ac.uk/functional-gene-annotation/cardiovascular/projects
3ChEMBLhttp://www.ebi.ac.uk/chembl
4DIPhttp://dip.doe-mbi.ucla.edu/
5HPIDbhttp://www.agbase.msstate.edu/hpi/main.html
6InnateDBhttp://www.innatedb.com
7IntActhttp://www.ebi.ac.uk/intact
8menthahttp://mentha.uniroma2.it/
9MPIDBhttp://jcvi.org/mpidb/
11MatrixDBhttp://matrixdb.ibcp.fr/
12MINThttp://mint.bio.uniroma2.it/
13Reactomehttp://www.reactome.org/
14Reactome-FIshttp://www.reactome.org/
16BINDhttp://www.baderlab.org
17Interoporchttp://biodev.extra.cea.fr/interoporc
20I2D-IMExhttp://ophid.utoronto.ca/
21InnateDB-IMExhttp://www.innatedb.ca/
22MolConhttp://www.molecularconnections.com
23UniProthttp://www.uniprot.org
24MBInfohttp://www.mechanobio.info/
25BindingDBhttp://www.bindingdb.org
26VirHostNethttp://pbildb1.univ-lyon1.fr/virhostnet
28SpikeNone
\n", "
" ], "text/plain": [ " name \\\n", "1 BioGrid \n", "2 bhf-ucl \n", "3 ChEMBL \n", "4 DIP \n", "5 HPIDb \n", "6 InnateDB \n", "7 IntAct \n", "8 mentha \n", "9 MPIDB \n", "11 MatrixDB \n", "12 MINT \n", "13 Reactome \n", "14 Reactome-FIs \n", "16 BIND \n", "17 Interoporc \n", "20 I2D-IMEx \n", "21 InnateDB-IMEx \n", "22 MolCon \n", "23 UniProt \n", "24 MBInfo \n", "25 BindingDB \n", "26 VirHostNet \n", "28 Spike \n", "\n", " org_url \n", "1 http://www.thebiogrid.org/ \n", "2 http://www.ucl.ac.uk/functional-gene-annotation/cardiovascular/projects \n", "3 http://www.ebi.ac.uk/chembl \n", "4 http://dip.doe-mbi.ucla.edu/ \n", "5 http://www.agbase.msstate.edu/hpi/main.html \n", "6 http://www.innatedb.com \n", "7 http://www.ebi.ac.uk/intact \n", "8 http://mentha.uniroma2.it/ \n", "9 http://jcvi.org/mpidb/ \n", "11 http://matrixdb.ibcp.fr/ \n", "12 http://mint.bio.uniroma2.it/ \n", "13 http://www.reactome.org/ \n", "14 http://www.reactome.org/ \n", "16 http://www.baderlab.org \n", "17 http://biodev.extra.cea.fr/interoporc \n", "20 http://ophid.utoronto.ca/ \n", "21 http://www.innatedb.ca/ \n", "22 http://www.molecularconnections.com \n", "23 http://www.uniprot.org \n", "24 http://www.mechanobio.info/ \n", "25 http://www.bindingdb.org \n", "26 http://pbildb1.univ-lyon1.fr/virhostnet \n", "28 None " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def get_databases(db_xml):\n", " for service in db_xml:\n", " for elem in service:\n", " ns_clean_tag = elem.tag[elem.tag.find('}') + 1:]\n", " if ns_clean_tag == 'name':\n", " name = elem.text\n", " elif ns_clean_tag == 'active':\n", " active = False if elem.text == 'false' else True\n", " elif ns_clean_tag == 'restUrl':\n", " rest_url = elem.text\n", " elif ns_clean_tag == 'restExample':\n", " example = elem.text\n", " elif ns_clean_tag == 'organizationUrl':\n", " org_url = elem.text\n", " else:\n", " pass # there are a few more\n", " yield {'name': name, 'active': active, 'org_url': org_url,\n", " 'example': example, 'rest_url': rest_url}\n", "\n", "dbs_xml = get_psiquic('registry', 'registry', action='STATUS', format='xml')\n", "dbs_xml_parsed = ET.fromstring(dbs_xml)\n", "dbs = pd.DataFrame.from_records(get_databases(dbs_xml_parsed))\n", "\n", "pd.options.display.max_colwidth = 100\n", "active_dbs = dbs[dbs.active==True]\n", "active_dbs.drop(['active', 'example', 'rest_url'], 1)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4802\n", "DB: BioGrid, count: 2375\n", "DB: bhf-ucl, count: 14\n", "DB: ChEMBL, count: 80\n", "DB: DIP, count: 0\n", "DB: HPIDb, count: 74\n", "DB: InnateDB, count: 136\n", "DB: IntAct, count: 4802\n", "DB: mentha, count: 3217\n", "DB: MPIDB, count: 0\n", "DB: MatrixDB, count: 0\n", "DB: MINT, count: 2158\n", "DB: Reactome, count: 0\n", "DB: Reactome-FIs, count: 369\n", "DB: BIND, count: 47\n", "DB: Interoporc, count: 0\n", "DB: I2D-IMEx, count: 194\n", "DB: InnateDB-IMEx, count: 5\n", "DB: MolCon, count: 18\n", "DB: UniProt, count: 438\n", "DB: MBInfo, count: 0\n", "DB: BindingDB, count: 0\n", "DB: VirHostNet, count: 0\n", "DB: Spike, count: 450\n" ] } ], "source": [ "req = get_psiquic('intact/webservices/current/search/query', 'tp53', format='count')\n", "print(req)\n", "for index, db in active_dbs.iterrows():\n", " req = get_psiquic(db['rest_url'], 'query/tp53', full_url=True, format='count')\n", " count = int(req)\n", " print('DB: %s, count: %d' % ( db['name'], count))\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "set(['uniprotkb', 'ddbj/embl/genbank', '-', 'ensembl', 'intact', 'chebi'])\n" ] } ], "source": [ "req = get_psiquic('intact/webservices/current/search/query', 'tp53',\n", " firstResult=0, maxResults=1000)\n", "answer = csv.reader(StringIO(req), delimiter='\\t')\n", "db_types = set()\n", "for record in answer:\n", " db_types.add(record[0].split(':')[0])\n", " db_types.add(record[1].split(':')[0])\n", "print(db_types)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }