In [1]:
import requests
 
ensembl_server = 'http://rest.ensembl.org'

def do_request(server, service, *args, **kwargs):
    url_params = ''
    for a in args:
        if a is not None:
            url_params += '/' + a
    req = requests.get('%s/%s%s' % (server, service, url_params),
                       params=kwargs,
                       headers={'Content-Type': 'application/json'})
 
    if not req.ok:
        req.raise_for_status()
    return req.json()

In [2]:
answer = do_request(ensembl_server, 'info/species')
for sp in answer['species']:
    print(sp['name'])

saccharomyces_cerevisiae
ciona_savignyi
myotis_lucifugus
taeniopygia_guttata
sorex_araneus
otolemur_garnettii
macropus_eugenii
erinaceus_europaeus
anolis_carolinensis
gadus_morhua
dasypus_novemcinctus
chlorocebus_sabaeus
tursiops_truncatus
mus_musculus
bos_taurus
monodelphis_domestica
choloepus_hoffmanni
sus_scrofa
rattus_norvegicus
caenorhabditis_elegans
pteropus_vampyrus
microcebus_murinus
sarcophilus_harrisii
ovis_aries
papio_anubis
pelodiscus_sinensis
equus_caballus
xiphophorus_maculatus
macaca_mulatta
astyanax_mexicanus
latimeria_chalumnae
ficedula_albicollis
gasterosteus_aculeatus
gorilla_gorilla
oryctolagus_cuniculus
oreochromis_niloticus
echinops_telfairi
nomascus_leucogenys
homo_sapiens
dipodomys_ordii
lepisosteus_oculatus
anas_platyrhynchos
canis_familiaris
callithrix_jacchus
pongo_abelii
ornithorhynchus_anatinus
tetraodon_nigroviridis
mustela_putorius_furo
tarsius_syrichta
vicugna_pacos
meleagris_gallopavo
xenopus_tropicalis
ictidomys_tridecemlineatus
cavia_porcellus
takifug

In [3]:
ext_dbs = do_request(ensembl_server, 'info/external_dbs', 'homo_sapiens', filter='HGNC%')
print(ext_dbs)

[{u'release': u'1', u'display_name': u'HGNC Symbol', u'name': u'HGNC', u'description': None}, {u'release': u'1', u'display_name': u'HGNC (automatic)', u'name': u'HGNC_automatic_gene', u'description': None}, {u'release': u'1', u'display_name': u'HGNC (automatic)', u'name': u'HGNC_automatic_transcript', u'description': None}, {u'release': u'1', u'display_name': u'HGNC (curated)', u'name': u'HGNC_curated_gene', u'description': None}, {u'release': u'1', u'display_name': u'HGNC (curated)', u'name': u'HGNC_curated_transcript', u'description': None}, {u'release': u'1', u'display_name': u'HGNC transcript name', u'name': u'HGNC_trans_name', u'description': u'transcript name from HGNC'}]


In [4]:
answer = do_request(ensembl_server, 'lookup/symbol', 'homo_sapiens', 'LCT')
print(answer)
lct_id = answer['id']

{u'assembly_name': u'GRCh38', u'display_name': u'LCT', u'description': u'lactase [Source:HGNC Symbol;Acc:HGNC:6530]', u'seq_region_name': u'2', u'logic_name': u'ensembl_havana_gene', u'object_type': u'Gene', u'start': 135787840, u'id': u'ENSG00000115850', u'source': u'ensembl_havana', u'db_type': u'core', u'biotype': u'protein_coding', u'end': 135837180, u'species': u'homo_sapiens', u'strand': -1}


In [5]:
lct_seq = do_request(ensembl_server, 'sequence/id', lct_id)
print(lct_seq)

{u'molecule': u'dna', u'id': u'ENSG00000115850', u'seq': u'GTTCCTAGAAAATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATGCTGGGGGTCAGACTGGGAGTCTGATAGAAATTTCATTTCCACCGCTGGTCCTCTAACCAATGACTTGCTGCACAACCTGAGTGGTCTCCTGGGAGACCAGAGTTCTAACTTTGTAGCAGGGGACAAAGACATGTATGTTTGTCACCAGCCACTGCCCACTTTCCTGCCAGAATACTTCAGCAGTCTCCATGCCAGTCAGATCACCCATTATAAGGTATTTCTGTCATGGGCACAGCTCCTCCCAGCAGGAAGCACCCAGAATCCAGACGAGAAAACAGTGCAGTGCTACCGGCGACTCCTCAAGGCCCTCAAGACTGCACGGCTTCAGCCCATGGTCATCCTGCACCACCAGACCCTCCCTGCCAGCACCCTCCGGAGAACCGAAGCCTTTGCTGACCTCTTCGCCGACTATGCCACATTCGCCTTCCACTCCTTCGGGGACCTAGTTGGGATCTGGTTCACCTTCAGTGACTTGGAGGAAGTGATCAAGGAGCTTCCCCACCAGGAATCAAGAGCGTCACAACTCCAGACCCTCAGTGATGCCCACAGAAAAGCCTATGAGATTTACCACGAAAGCTATGCTTTTCAGGGTGAGTACACATTGACCTGATGGTGACCCCTCGGCAACCTTCATCACACACCTTCCCCATCCTCCTTAGAGCAGATTCGACATTTCTCCCAACTCACCTTCAGCAGTCCTCTTATGTCTGTGCATAGGGAGAAATTAATATTGTAAATTGATTTCCCACTGGCGATAGGAAGGGGTAGCTAACATGGCAAAACACTCAGCATTTCCTTTGAAAAATATCTTTGAGGCTCACGCCTGTAATCCTAGCACTTTGGGAGGCCGAGGTGGGCGGATCACTTGAAGTCAGGAGTT

In [6]:
lct_xrefs = do_request(ensembl_server, 'xrefs/id', lct_id)
for xref in lct_xrefs:
    print(xref['db_display_name'])
    print(xref)

Vega gene
{u'display_id': u'OTTHUMG00000131738', u'description': None, u'db_display_name': u'Vega gene', u'info_text': u'Added during ensembl-vega production', u'info_type': u'NONE', u'primary_id': u'OTTHUMG00000131738', u'synonyms': [], u'version': u'2', u'dbname': u'Vega_gene'}
Vega gene
{u'display_id': u'LCT', u'description': None, u'db_display_name': u'Vega gene', u'info_text': u'', u'info_type': u'NONE', u'primary_id': u'OTTHUMG00000131738', u'synonyms': [], u'version': u'1', u'dbname': u'Vega_gene'}
Havana gene
{u'display_id': u'OTTHUMG00000131738', u'description': None, u'db_display_name': u'Havana gene', u'info_text': u'', u'info_type': u'NONE', u'primary_id': u'OTTHUMG00000131738', u'synonyms': [], u'version': u'0', u'dbname': u'OTTG'}
Expression Atlas
{u'display_id': u'ENSG00000115850', u'description': u'', u'db_display_name': u'Expression Atlas', u'info_text': u'', u'info_type': u'DIRECT', u'primary_id': u'ENSG00000115850', u'synonyms': [], u'version': u'0', u'dbname': u'Arr

In [7]:
refs = do_request(ensembl_server, 'xrefs/id', lct_id, external_db='GO', all_levels='1')
print(lct_id, refs)

(u'ENSG00000115850', [{u'display_id': u'GO:0000016', u'description': u'lactase activity', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0000016', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0004553', u'description': u'hydrolase activity, hydrolyzing O-glycosyl compounds', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'IEA'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0004553', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0005886', u'description': u'plasma membrane', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0005886', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0005887', u'description': u'integral component of plasma membrane', u'db_display_name': u'GO', u'info_text

In [8]:
hom_response = do_request(ensembl_server, 'homology/id', lct_id, type='orthologues', sequence='none')
#print(hom_response['data'][0]['homologies'])
homologies = hom_response['data'][0]['homologies']
for homology in homologies:
    print(homology['target']['species'])
    if homology['target']['species'] != 'equus_caballus':
        continue
    print(homology)
    print(homology['taxonomy_level'])
    horse_id = homology['target']['id']

pan_troglodytes
gorilla_gorilla
pongo_abelii
nomascus_leucogenys
papio_anubis
macaca_mulatta
chlorocebus_sabaeus
callithrix_jacchus
tarsius_syrichta
otolemur_garnettii
microcebus_murinus
ictidomys_tridecemlineatus
rattus_norvegicus
oryctolagus_cuniculus
oryctolagus_cuniculus
ochotona_princeps
dipodomys_ordii
cavia_porcellus
mus_musculus
oryctolagus_cuniculus
myotis_lucifugus
ailuropoda_melanoleuca
felis_catus
canis_familiaris
sus_scrofa
sorex_araneus
ovis_aries
bos_taurus
tursiops_truncatus
sus_scrofa
pteropus_vampyrus
mustela_putorius_furo
erinaceus_europaeus
vicugna_pacos
equus_caballus
{u'taxonomy_level': u'Boreoeutheria', u'target': {u'perc_pos': 92, u'id': u'ENSECAG00000018594', u'protein_id': u'ENSECAP00000016483', u'perc_id': 86, u'cigar_line': u'329MD258MD1264MD76M', u'taxon_id': 9796, u'species': u'equus_caballus'}, u'source': {u'perc_pos': 92, u'id': u'ENSG00000115850', u'protein_id': u'ENSP00000264162', u'perc_id': 86, u'cigar_line': u'351M2D1503MD73M', u'taxon_id': 9606, u'

In [9]:
horse_req = do_request(ensembl_server, 'lookup/id', horse_id)
print(horse_req)

{u'assembly_name': u'EquCab2', u'display_name': u'LCT', u'description': u'lactase [Source:HGNC Symbol;Acc:HGNC:6530]', u'seq_region_name': u'18', u'logic_name': u'ensembl', u'object_type': u'Gene', u'start': 19610968, u'id': u'ENSECAG00000018594', u'source': u'ensembl', u'db_type': u'core', u'biotype': u'protein_coding', u'end': 19657160, u'species': u'equus_caballus', u'strand': -1}


In [10]:
#maybe synteny of MCM6 and LCT with caballus and gorilla