# Get phenotypes associated with a list of genes from high content screens

This notebook takes a list of gene symbols and queries the IDR for phenotypes associated with the genes in high content screens.

In [1]:
import json
import csv
import pandas as pd

### Set up where to query and session

In [2]:
import requests

INDEX_PAGE = "http://idr.openmicroscopy.org/webclient/?experimenter=-1"

# create http session
with requests.Session() as session:
    request = requests.Request('GET', INDEX_PAGE)
    prepped = session.prepare_request(request)
    response = session.send(prepped)
    if response.status_code != 200:
        response.raise_for_status()

### Get list of genes to query either from file (one gene symbol per line) or enter directly in list in notebook

In [3]:
# uncomment the next two lines if you'd rather read gene list in from a file 
# with open('./includes/FiveExampleGenes.txt') as f:
#    genes = f.read().splitlines()

# comment out the next line if you've read in the gene list from a file
genes = ['ASH2L', 'ash2', '85441' ]

# check the gene list has been read in
genes[:5]

['ASH2L', 'ash2', '85441']

### Set up base URLS so can use shorter variable names later on

In [4]:
SCREENS_PROJECTS_URL = "http://idr.openmicroscopy.org/mapr/api/{key}/?value={value}"
PLATES_URL = "http://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}"
IMAGES_URL = "http://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}"
ATTRIBUTES_URL = "http://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}"

### For each in gene in the list fetch the screens, plates, images and then phenotypes associated with the images

The results are both printed to the screen and saved in a .csv file

In [6]:
attr_type = "gene"
attr_keys = {
    "phenotype":
        ("Phenotype",
         "Phenotype Term Name",
         "Phenotype Term Accession",
         "Phenotype Term Accession URL")
}

from tempfile import NamedTemporaryFile
csvfile = NamedTemporaryFile("w")
try:
    fieldnames = [
        'Gene', 'Screen', 'Plate', 'Image',
        'Phenotype', 'Phenotype Term Name', 'Phenotype Term Accession',
        'Phenotype Term Accession URL']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for gene in genes:
        qs1 = {'key': attr_type, 'value': gene}
        url1 = SCREENS_PROJECTS_URL.format(**qs1)

        for s in session.get(url1).json()['screens']:
            screen_id = s['id']
            screen_name = s['name']
            qs2 = {'key': attr_type, 'value': gene, 'screen_id': screen_id}
            url2 = PLATES_URL.format(**qs2)

            for p in session.get(url2).json()['plates']:
                plate_id = p['id']
                plate_name = p['name']
                qs3 = {'key': attr_type, 'value': gene,
                       'parent_type': 'plate', 'parent_id': plate_id}
                url3 = IMAGES_URL.format(**qs3)

                for i in session.get(url3).json()['images']:

                    image_id = i['id']
                    url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})
                    for a in session.get(url4).json()['annotations']:
                        ontologies = []  # for ontology terms for a phenotype
                        row = {}

                        for v in a['values']:

                            if str(v[0]) in attr_keys['phenotype']:
                                if str(v[0]) in ['Phenotype']:  # has phenotype
                                    row[str(v[0])] = v[1]  # so create row

                                # if there are ontology mappings for the
                                # phenotype then add them to the ontologies list                    
                                ontList = ['Phenotype Term Name',
                                           'Phenotype Term Accession',
                                           'Phenotype Term Accession URL']

                                if str(v[0]) in ontList:

                                    ontologies.extend([str(v[0]), str(v[1])])               

                        if row:

                            if (len(ontologies) == 0):  # no ontology mapping
                                row.update({'Gene': gene,
                                            'Screen': screen_name,
                                            'Plate': plate_name,
                                            'Image': image_id})
                                writer.writerow(row)

                            if (len(ontologies) > 0):  # 1+ ontology mapping
                                row.update({'Gene': gene,
                                            'Screen': screen_name,
                                            'Plate': plate_name,
                                            'Image': image_id})

                                # we have the start of a row now
                                # but we want to print out as many rows
                                # as there are ontology mappings
                                # so if there is mapping to 1 ontology term
                                # print 1 row, if there are 2 ontology terms
                                # print 2 rows etc

                                numberOfRows = len(ontologies)/6
                                # this is 3 pairs of ontology values per mapping

                                # then add the ontology mappings and print out
                                n = 1
                                while (n <= numberOfRows):
                                    row.update({ontologies[0]: ontologies[1],
                                                ontologies[2]: ontologies[3],
                                                ontologies[4]: ontologies[5]})
                                    # remove that set of ontology mappings
                                    ontologies = ontologies[6:]
                                    writer.writerow(row)
                                    n += 1
    df = pd.read_csv(csvfile.name)
finally:
    csvfile.close()

# view what is in the csv file (displaying the first 10 rows alone)
df.head(10)

Unnamed: 0,Gene,Screen,Plate,Image,Phenotype,Phenotype Term Name,Phenotype Term Accession,Phenotype Term Accession URL
0,ASH2L,idr0012-fuchs-cellmorph/screenA (2),HT28,1830141,elongated cells,elongated cell phenotype,CMPO_0000077,http://www.ebi.ac.uk/cmpo/CMPO_0000077
1,ASH2L,idr0012-fuchs-cellmorph/screenA (2),HT28,1830140,elongated cells,elongated cell phenotype,CMPO_0000077,http://www.ebi.ac.uk/cmpo/CMPO_0000077
2,ash2,idr0001-graml-sysgro/screenA (60),JL_120809_S14B,1239832,abnormal microtubule cytoskeleton morphology d...,abnormal microtubule cytoskeleton morphology d...,CMPO_0000438,http://www.ebi.ac.uk/cmpo/CMPO_0000438
3,ash2,idr0001-graml-sysgro/screenA (60),JL_120809_S14B,1239830,abnormal microtubule cytoskeleton morphology d...,abnormal microtubule cytoskeleton morphology d...,CMPO_0000438,http://www.ebi.ac.uk/cmpo/CMPO_0000438
4,ash2,idr0001-graml-sysgro/screenA (60),JL_120809_S14B,1239833,abnormal microtubule cytoskeleton morphology d...,abnormal microtubule cytoskeleton morphology d...,CMPO_0000438,http://www.ebi.ac.uk/cmpo/CMPO_0000438
5,ash2,idr0001-graml-sysgro/screenA (60),JL_120809_S14B,1239834,abnormal microtubule cytoskeleton morphology d...,abnormal microtubule cytoskeleton morphology d...,CMPO_0000438,http://www.ebi.ac.uk/cmpo/CMPO_0000438
6,ash2,idr0001-graml-sysgro/screenA (60),JL_120809_S14B,1239831,abnormal microtubule cytoskeleton morphology d...,abnormal microtubule cytoskeleton morphology d...,CMPO_0000438,http://www.ebi.ac.uk/cmpo/CMPO_0000438
7,ash2,idr0001-graml-sysgro/screenA (60),JL_120809_S14B,1239835,abnormal microtubule cytoskeleton morphology d...,abnormal microtubule cytoskeleton morphology d...,CMPO_0000438,http://www.ebi.ac.uk/cmpo/CMPO_0000438
8,ash2,idr0001-graml-sysgro/screenA (60),JL_121215_J4_1,1269432,abnormal microtubule cytoskeleton morphology d...,abnormal microtubule cytoskeleton morphology d...,CMPO_0000438,http://www.ebi.ac.uk/cmpo/CMPO_0000438
9,ash2,idr0001-graml-sysgro/screenA (60),JL_121215_J4_1,1269435,abnormal microtubule cytoskeleton morphology d...,abnormal microtubule cytoskeleton morphology d...,CMPO_0000438,http://www.ebi.ac.uk/cmpo/CMPO_0000438


License

Copyright (C) 2017 University of Dundee. All Rights Reserved.

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
