{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Get phenotypes associated with a list of genes from high content screens\n", "\n", "This notebook takes a list of gene symbols and queries the IDR for phenotypes associated with the genes in high content screens." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import json\n", "import csv\n", "import pandas as pd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set up where to query and session" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import requests\n", "\n", "INDEX_PAGE = \"https://idr.openmicroscopy.org/webclient/?experimenter=-1\"\n", "\n", "# create http session\n", "with requests.Session() as session:\n", " request = requests.Request('GET', INDEX_PAGE)\n", " prepped = session.prepare_request(request)\n", " response = session.send(prepped)\n", " if response.status_code != 200:\n", " response.raise_for_status()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get list of genes to query either from file (one gene symbol per line) or enter directly in list in notebook" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['ASH2L', 'ash2', '85441']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# uncomment the next two lines if you'd rather read gene list in from a file \n", "# with open('./includes/FiveExampleGenes.txt') as f:\n", "# genes = f.read().splitlines()\n", "\n", "# comment out the next line if you've read in the gene list from a file\n", "genes = ['ASH2L', 'ash2', '85441' ]\n", "\n", "# check the gene list has been read in\n", "genes[:5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set up base URLS so can use shorter variable names later on" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "SCREENS_PROJECTS_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}\"\n", "PLATES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}\"\n", "IMAGES_URL = \"https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}\"\n", "ATTRIBUTES_URL = \"https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### For each in gene in the list fetch the screens, plates, images and then phenotypes associated with the images\n", "\n", "The results are both printed to the screen and saved in a .csv file" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GeneScreenPlateImagePhenotypePhenotype Term NamePhenotype Term AccessionPhenotype Term Accession URL
0ASH2Lidr0012-fuchs-cellmorph/screenA (2)HT281830141elongated cellselongated cell phenotypeCMPO_0000077http://www.ebi.ac.uk/cmpo/CMPO_0000077
1ASH2Lidr0012-fuchs-cellmorph/screenA (2)HT281830140elongated cellselongated cell phenotypeCMPO_0000077http://www.ebi.ac.uk/cmpo/CMPO_0000077
2ash2idr0001-graml-sysgro/screenA (60)JL_120809_S14B1239832abnormal microtubule cytoskeleton morphology d...abnormal microtubule cytoskeleton morphology d...CMPO_0000438http://www.ebi.ac.uk/cmpo/CMPO_0000438
3ash2idr0001-graml-sysgro/screenA (60)JL_120809_S14B1239830abnormal microtubule cytoskeleton morphology d...abnormal microtubule cytoskeleton morphology d...CMPO_0000438http://www.ebi.ac.uk/cmpo/CMPO_0000438
4ash2idr0001-graml-sysgro/screenA (60)JL_120809_S14B1239833abnormal microtubule cytoskeleton morphology d...abnormal microtubule cytoskeleton morphology d...CMPO_0000438http://www.ebi.ac.uk/cmpo/CMPO_0000438
5ash2idr0001-graml-sysgro/screenA (60)JL_120809_S14B1239834abnormal microtubule cytoskeleton morphology d...abnormal microtubule cytoskeleton morphology d...CMPO_0000438http://www.ebi.ac.uk/cmpo/CMPO_0000438
6ash2idr0001-graml-sysgro/screenA (60)JL_120809_S14B1239831abnormal microtubule cytoskeleton morphology d...abnormal microtubule cytoskeleton morphology d...CMPO_0000438http://www.ebi.ac.uk/cmpo/CMPO_0000438
7ash2idr0001-graml-sysgro/screenA (60)JL_120809_S14B1239835abnormal microtubule cytoskeleton morphology d...abnormal microtubule cytoskeleton morphology d...CMPO_0000438http://www.ebi.ac.uk/cmpo/CMPO_0000438
8ash2idr0001-graml-sysgro/screenA (60)JL_121215_J4_11269432abnormal microtubule cytoskeleton morphology d...abnormal microtubule cytoskeleton morphology d...CMPO_0000438http://www.ebi.ac.uk/cmpo/CMPO_0000438
9ash2idr0001-graml-sysgro/screenA (60)JL_121215_J4_11269435abnormal microtubule cytoskeleton morphology d...abnormal microtubule cytoskeleton morphology d...CMPO_0000438http://www.ebi.ac.uk/cmpo/CMPO_0000438
\n", "
" ], "text/plain": [ " Gene Screen Plate Image \\\n", "0 ASH2L idr0012-fuchs-cellmorph/screenA (2) HT28 1830141 \n", "1 ASH2L idr0012-fuchs-cellmorph/screenA (2) HT28 1830140 \n", "2 ash2 idr0001-graml-sysgro/screenA (60) JL_120809_S14B 1239832 \n", "3 ash2 idr0001-graml-sysgro/screenA (60) JL_120809_S14B 1239830 \n", "4 ash2 idr0001-graml-sysgro/screenA (60) JL_120809_S14B 1239833 \n", "5 ash2 idr0001-graml-sysgro/screenA (60) JL_120809_S14B 1239834 \n", "6 ash2 idr0001-graml-sysgro/screenA (60) JL_120809_S14B 1239831 \n", "7 ash2 idr0001-graml-sysgro/screenA (60) JL_120809_S14B 1239835 \n", "8 ash2 idr0001-graml-sysgro/screenA (60) JL_121215_J4_1 1269432 \n", "9 ash2 idr0001-graml-sysgro/screenA (60) JL_121215_J4_1 1269435 \n", "\n", " Phenotype \\\n", "0 elongated cells \n", "1 elongated cells \n", "2 abnormal microtubule cytoskeleton morphology d... \n", "3 abnormal microtubule cytoskeleton morphology d... \n", "4 abnormal microtubule cytoskeleton morphology d... \n", "5 abnormal microtubule cytoskeleton morphology d... \n", "6 abnormal microtubule cytoskeleton morphology d... \n", "7 abnormal microtubule cytoskeleton morphology d... \n", "8 abnormal microtubule cytoskeleton morphology d... \n", "9 abnormal microtubule cytoskeleton morphology d... \n", "\n", " Phenotype Term Name Phenotype Term Accession \\\n", "0 elongated cell phenotype CMPO_0000077 \n", "1 elongated cell phenotype CMPO_0000077 \n", "2 abnormal microtubule cytoskeleton morphology d... CMPO_0000438 \n", "3 abnormal microtubule cytoskeleton morphology d... CMPO_0000438 \n", "4 abnormal microtubule cytoskeleton morphology d... CMPO_0000438 \n", "5 abnormal microtubule cytoskeleton morphology d... CMPO_0000438 \n", "6 abnormal microtubule cytoskeleton morphology d... CMPO_0000438 \n", "7 abnormal microtubule cytoskeleton morphology d... CMPO_0000438 \n", "8 abnormal microtubule cytoskeleton morphology d... CMPO_0000438 \n", "9 abnormal microtubule cytoskeleton morphology d... CMPO_0000438 \n", "\n", " Phenotype Term Accession URL \n", "0 http://www.ebi.ac.uk/cmpo/CMPO_0000077 \n", "1 http://www.ebi.ac.uk/cmpo/CMPO_0000077 \n", "2 http://www.ebi.ac.uk/cmpo/CMPO_0000438 \n", "3 http://www.ebi.ac.uk/cmpo/CMPO_0000438 \n", "4 http://www.ebi.ac.uk/cmpo/CMPO_0000438 \n", "5 http://www.ebi.ac.uk/cmpo/CMPO_0000438 \n", "6 http://www.ebi.ac.uk/cmpo/CMPO_0000438 \n", "7 http://www.ebi.ac.uk/cmpo/CMPO_0000438 \n", "8 http://www.ebi.ac.uk/cmpo/CMPO_0000438 \n", "9 http://www.ebi.ac.uk/cmpo/CMPO_0000438 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "attr_type = \"gene\"\n", "attr_keys = {\n", " \"phenotype\":\n", " (\"Phenotype\",\n", " \"Phenotype Term Name\",\n", " \"Phenotype Term Accession\",\n", " \"Phenotype Term Accession URL\")\n", "}\n", "\n", "from tempfile import NamedTemporaryFile\n", "csvfile = NamedTemporaryFile(\"w\")\n", "try:\n", " fieldnames = [\n", " 'Gene', 'Screen', 'Plate', 'Image',\n", " 'Phenotype', 'Phenotype Term Name', 'Phenotype Term Accession',\n", " 'Phenotype Term Accession URL']\n", " writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n", " writer.writeheader()\n", "\n", " for gene in genes:\n", " qs1 = {'key': attr_type, 'value': gene}\n", " url1 = SCREENS_PROJECTS_URL.format(**qs1)\n", "\n", " for s in session.get(url1).json()['screens']:\n", " screen_id = s['id']\n", " screen_name = s['name']\n", " qs2 = {'key': attr_type, 'value': gene, 'screen_id': screen_id}\n", " url2 = PLATES_URL.format(**qs2)\n", "\n", " for p in session.get(url2).json()['plates']:\n", " plate_id = p['id']\n", " plate_name = p['name']\n", " qs3 = {'key': attr_type, 'value': gene,\n", " 'parent_type': 'plate', 'parent_id': plate_id}\n", " url3 = IMAGES_URL.format(**qs3)\n", "\n", " for i in session.get(url3).json()['images']:\n", "\n", " image_id = i['id']\n", " url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})\n", " for a in session.get(url4).json()['annotations']:\n", " ontologies = [] # for ontology terms for a phenotype\n", " row = {}\n", "\n", " for v in a['values']:\n", "\n", " if str(v[0]) in attr_keys['phenotype']:\n", " if str(v[0]) in ['Phenotype']: # has phenotype\n", " row[str(v[0])] = v[1] # so create row\n", "\n", " # if there are ontology mappings for the\n", " # phenotype then add them to the ontologies list \n", " ontList = ['Phenotype Term Name',\n", " 'Phenotype Term Accession',\n", " 'Phenotype Term Accession URL']\n", "\n", " if str(v[0]) in ontList:\n", "\n", " ontologies.extend([str(v[0]), str(v[1])]) \n", "\n", " if row:\n", "\n", " if (len(ontologies) == 0): # no ontology mapping\n", " row.update({'Gene': gene,\n", " 'Screen': screen_name,\n", " 'Plate': plate_name,\n", " 'Image': image_id})\n", " writer.writerow(row)\n", "\n", " if (len(ontologies) > 0): # 1+ ontology mapping\n", " row.update({'Gene': gene,\n", " 'Screen': screen_name,\n", " 'Plate': plate_name,\n", " 'Image': image_id})\n", "\n", " # we have the start of a row now\n", " # but we want to print out as many rows\n", " # as there are ontology mappings\n", " # so if there is mapping to 1 ontology term\n", " # print 1 row, if there are 2 ontology terms\n", " # print 2 rows etc\n", "\n", " numberOfRows = len(ontologies)/6\n", " # this is 3 pairs of ontology values per mapping\n", "\n", " # then add the ontology mappings and print out\n", " n = 1\n", " while (n <= numberOfRows):\n", " row.update({ontologies[0]: ontologies[1],\n", " ontologies[2]: ontologies[3],\n", " ontologies[4]: ontologies[5]})\n", " # remove that set of ontology mappings\n", " ontologies = ontologies[6:]\n", " writer.writerow(row)\n", " n += 1\n", " df = pd.read_csv(csvfile.name)\n", "finally:\n", " csvfile.close()\n", "\n", "# view what is in the csv file (displaying the first 10 rows alone)\n", "df.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "License\n", "\n", "Copyright (C) 2017 University of Dundee. All Rights Reserved.\n", "\n", "This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.\n", "\n", "This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.\n" ] } ], "metadata": { "kernelspec": { "display_name": "OMERO Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.14" } }, "nbformat": 4, "nbformat_minor": 1 }