{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import requests\n", " \n", "ensembl_server = 'http://rest.ensembl.org'\n", "\n", "def do_request(server, service, *args, **kwargs):\n", " url_params = ''\n", " for a in args:\n", " if a is not None:\n", " url_params += '/' + a\n", " req = requests.get('%s/%s%s' % (server, service, url_params),\n", " params=kwargs,\n", " headers={'Content-Type': 'application/json'})\n", " \n", " if not req.ok:\n", " req.raise_for_status()\n", " return req.json()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "saccharomyces_cerevisiae\n", "ciona_savignyi\n", "myotis_lucifugus\n", "taeniopygia_guttata\n", "sorex_araneus\n", "otolemur_garnettii\n", "macropus_eugenii\n", "erinaceus_europaeus\n", "anolis_carolinensis\n", "gadus_morhua\n", "dasypus_novemcinctus\n", "chlorocebus_sabaeus\n", "tursiops_truncatus\n", "mus_musculus\n", "bos_taurus\n", "monodelphis_domestica\n", "choloepus_hoffmanni\n", "sus_scrofa\n", "rattus_norvegicus\n", "caenorhabditis_elegans\n", "pteropus_vampyrus\n", "microcebus_murinus\n", "sarcophilus_harrisii\n", "ovis_aries\n", "papio_anubis\n", "pelodiscus_sinensis\n", "equus_caballus\n", "xiphophorus_maculatus\n", "macaca_mulatta\n", "astyanax_mexicanus\n", "latimeria_chalumnae\n", "ficedula_albicollis\n", "gasterosteus_aculeatus\n", "gorilla_gorilla\n", "oryctolagus_cuniculus\n", "oreochromis_niloticus\n", "echinops_telfairi\n", "nomascus_leucogenys\n", "homo_sapiens\n", "dipodomys_ordii\n", "lepisosteus_oculatus\n", "anas_platyrhynchos\n", "canis_familiaris\n", "callithrix_jacchus\n", "pongo_abelii\n", "ornithorhynchus_anatinus\n", "tetraodon_nigroviridis\n", "mustela_putorius_furo\n", "tarsius_syrichta\n", "vicugna_pacos\n", "meleagris_gallopavo\n", "xenopus_tropicalis\n", "ictidomys_tridecemlineatus\n", "cavia_porcellus\n", "takifugu_rubripes\n", "ochotona_princeps\n", "pan_troglodytes\n", "petromyzon_marinus\n", "ailuropoda_melanoleuca\n", "felis_catus\n", "procavia_capensis\n", "oryzias_latipes\n", "danio_rerio\n", "gallus_gallus\n", "tupaia_belangeri\n", "ciona_intestinalis\n", "loxodonta_africana\n", "poecilia_formosa\n", "drosophila_melanogaster\n", "Ancestral sequences\n" ] } ], "source": [ "answer = do_request(ensembl_server, 'info/species')\n", "for sp in answer['species']:\n", " print(sp['name'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{u'release': u'1', u'display_name': u'HGNC Symbol', u'name': u'HGNC', u'description': None}, {u'release': u'1', u'display_name': u'HGNC (automatic)', u'name': u'HGNC_automatic_gene', u'description': None}, {u'release': u'1', u'display_name': u'HGNC (automatic)', u'name': u'HGNC_automatic_transcript', u'description': None}, {u'release': u'1', u'display_name': u'HGNC (curated)', u'name': u'HGNC_curated_gene', u'description': None}, {u'release': u'1', u'display_name': u'HGNC (curated)', u'name': u'HGNC_curated_transcript', u'description': None}, {u'release': u'1', u'display_name': u'HGNC transcript name', u'name': u'HGNC_trans_name', u'description': u'transcript name from HGNC'}]\n" ] } ], "source": [ "ext_dbs = do_request(ensembl_server, 'info/external_dbs', 'homo_sapiens', filter='HGNC%')\n", "print(ext_dbs)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{u'assembly_name': u'GRCh38', u'display_name': u'LCT', u'description': u'lactase [Source:HGNC Symbol;Acc:HGNC:6530]', u'seq_region_name': u'2', u'logic_name': u'ensembl_havana_gene', u'object_type': u'Gene', u'start': 135787840, u'id': u'ENSG00000115850', u'source': u'ensembl_havana', u'db_type': u'core', u'biotype': u'protein_coding', u'end': 135837180, u'species': u'homo_sapiens', u'strand': -1}\n" ] } ], "source": [ "answer = do_request(ensembl_server, 'lookup/symbol', 'homo_sapiens', 'LCT')\n", "print(answer)\n", "lct_id = answer['id']" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{u'molecule': u'dna', u'id': u'ENSG00000115850', u'seq': u'GTTCCTAGAAAATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATGCTGGGGGTCAGACTGGGAGTCTGATAGAAATTTCATTTCCACCGCTGGTCCTCTAACCAATGACTTGCTGCACAACCTGAGTGGTCTCCTGGGAGACCAGAGTTCTAACTTTGTAGCAGGGGACAAAGACATGTATGTTTGTCACCAGCCACTGCCCACTTTCCTGCCAGAATACTTCAGCAGTCTCCATGCCAGTCAGATCACCCATTATAAGGTATTTCTGTCATGGGCACAGCTCCTCCCAGCAGGAAGCACCCAGAATCCAGACGAGAAAACAGTGCAGTGCTACCGGCGACTCCTCAAGGCCCTCAAGACTGCACGGCTTCAGCCCATGGTCATCCTGCACCACCAGACCCTCCCTGCCAGCACCCTCCGGAGAACCGAAGCCTTTGCTGACCTCTTCGCCGACTATGCCACATTCGCCTTCCACTCCTTCGGGGACCTAGTTGGGATCTGGTTCACCTTCAGTGACTTGGAGGAAGTGATCAAGGAGCTTCCCCACCAGGAATCAAGAGCGTCACAACTCCAGACCCTCAGTGATGCCCACAGAAAAGCCTATGAGATTTACCACGAAAGCTATGCTTTTCAGGGTGAGTACACATTGACCTGATGGTGACCCCTCGGCAACCTTCATCACACACCTTCCCCATCCTCCTTAGAGCAGATTCGACATTTCTCCCAACTCACCTTCAGCAGTCCTCTTATGTCTGTGCATAGGGAGAAATTAATATTGTAAATTGATTTCCCACTGGCGATAGGAAGGGGTAGCTAACATGGCAAAACACTCAGCATTTCCTTTGAAAAATATCTTTGAGGCTCACGCCTGTAATCCTAGCACTTTGGGAGGCCGAGGTGGGCGGATCACTTGAAGTCAGGAGTTCGAGACCAGCCTGGCCAATATGGCAAAACCCCGTCTCTACTAAAAATACCAAAATTAGCCAGGGATGGTAGCAGGCGCCTGTGATCCCAGCTACTCGGGAGGCTTAGGCAGGAGAATTGCTTGAACCCAGGAGGCAGATGTTGCAGTGAGCCGAAATCATGCCACTGTACTACAGCCTGTGCGACAGAGCAAGACACAATCTCAAAAAAAAAAAATATGTATACATATATATATATATATATATATATATATATATATATATACACACATGTATTTTTGAAATACCCCTTGCCCAGTCTTGAGTTGTGCAAGCACTGATTGTCTTTCTGTCTTCCTTTCCGTCTATGTCCTGCTACCCTGACTTGGAGGCAAAGTTTCGTGGTTTCCTGTCCTTCCCCCTCGGCTCCCCTATACTTACATGTCTTAGTTGGAGCTTACATTGCAGCTTGGGAGACAGACAAATACAAGATGAAACAATTATATATCACAGGATGAGGAGAGCACAACTGTAACTGTTCACATTCCTGCCTCCTAGGGGACTTGAAGTTCTGATCTGTAGGCCTAGGCTGAACCAGGACATTTGTGATTATAATCAATTCCCTAAGTGTATGTTAGGTACACCAATTTTGAGAATCAATTTTGGATTATATATGCTTTCTTGTTTTTATTATATACCATATACATACATATTTGTATATATAGTACCTGATATATATATATATATATATATATATATATATATATATATATATATATACTTCTATGTTCTACTTTTTCATTTTAACATAGTTTATGACTATATTCTCATTTTATTAAAAACTTGAGCCAGGCATGGTGGCTCATGCCTATAATCCCAGCACGTTAGGAGGCCAAGGCAGGGGGATTGCTTGAGGCCAGGAGTTGGAGACCAGCCTGGGCAACATAGAGAGCCACTATCTCTAAAATAATATAATAATTTACTCCAGTGCTGGAATAGTATTGCTTAAAACAAAAGAAAAACTTGAAAGCATAACTTTCAATGGTCTTTTAGGTTCCCATTGTATAAATGGAGCATAAAGTTTAAGGTATTTTCCTTATTTTTGTATGTTTATTCTAATTTTCACTAGTTACAAGTTATGTTGCAATGACATTCCTATCTATAAACTATTTCCCACCTTCTGATAAATTCTTTCTGACAGAATCTTAGAATTGAAACTACTAGGTGAAAAGATCTGCATTCTTTAAGATTTTAAAGCATGGTACTAAATTATTTTCCAGAAAAGTTGTAATCATTTATACGAACAGTTATAAAATTACTCTCTGACCCCATTCTGTCAGTTTCAAATGCTATATTTTATTACTAATTTGACAGCTGAAAACAGCTTATTTTCATGTCTTGTTAATTTGCATGTATTTGATTATAGAGACGTTAAACATTTTTTTCTTCTTCTTTTTTTTTTTTTTTTTTTTTTTGAGATGGAGTCTCACTCTTTTGTCCAGGCTGGAGTGCAGTGGTGTGATCTCGGCTCACTGCAACCTCTGCCTCCCGAGTTCAAGCAATTCTCCTGCCTCAGCCTTCCCAGTAGCTGGGATTACAGGCACGCACCACCACATCTGGCTAATTTTTGTATTTTTAGTAGAGATGGGGTTTCACCGTGTTGGCCAGGCTGGACTCAAACTTCTGATCTCAGGTGATCCGCCCGCCTCGGACTCCCAAAATGCTGGGATTACAGGCGTGAGCCACCACGCCCGGCCTTAAACAAAAAAATAAAATAAGGCCAGGCGTGGTGGTTCACGCTTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGCGGGTCACAAGGTCAGAAGATCGAGACCATCCTGGCTAACACGGTGAAACCCCATCTCTACTAAAAATACAAAAAATTAGCGAGGCGTGGTAGCACGTGTCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAACCTGGGAGGCGGAGGTTGCAGTGAGCCGAGACTGCGCCATTGCACTCCAGCCTGGGCGACAGAGCGAGACTCTGTCTCAAAAATAAATGAAAAATAAACTAGGACATCATAGCTGCCTTATTCACAATAGCCAAAAGGTGGAAGCAACCCAGATATCCACTGATGGATGAATGGATAAACGAGACGTGGCATAGACATAAAATGGAATATTACTCCACATTTATTAAAAAGGGAAGAAATCATCACATGCTACAACATGGATGAACCTTGAGGACATTATGCTATGTGAAATAAGCCAGACACAAAAAGACAAACGCTGTATGATTCCATGTATAGGAGGAAACTAGAGCAGTCGAATTCATGGAGAGAGGAAGGAGAATGGCAGAGGCCAGGGGCTGGGGTAGGGGAGAAATGGAGAGCTCTTTAATGGGAGTAGAATTTCTGTTCAGAAAAATGAAAAAGTTCTGGAGATTGGTTGCACAACAATGTGAATATACTTAATAATACTGAACTGTGCACTTAAAAATAGTTAAGATGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAACACTTTGGGAGGCCAAGGCGGGCAGATCATGAGGTCAGGAGATCGAGACCATCCTGGCTAACCCGGTGCAACTCCGTCTCTACCAAAAATGCAAAAAATTAGCCGGGCGTGGTGGCGGGCCCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACGCGGGAGGCGGAGCTTTCAGTGAGCCGAGATGGCACGACTGCACTCCAGCCTGGGCGACGGAGCGAGACTCCGTCTCAAAAAAAAAAAAAAAAAAAAAAAGAAGTTTAGGAAGGAGTTCTCTCCTTGCCAACTCTCCAACTGCTCTGCATTTCATAGTCCTGATTTTCACAAAACACTCTCTAAAGGAACATTATTTTTTTAAGCCACCTTGTCTTCTAAAATCTTTCAGCTAGAGTAAAGTCATTTGGATGATTTCTTCAGGTCCATAGGAAATGAATCCCCCAGTGGTTTCCACAGTCAGAGCCTCCTGCCCTCGCTCACCTGCTGTTCGTGTCTCTGGTTGGTTTCAGGCGGAAAACTCTCTGTTGTCCTGCGAGCTGAAGATATCCCGGAGCTCCTGCTAGAACCACCCATATCTGCGCTTGCCCAGGTGACAGCAGCCCAAAAATATACCTGTAACATCTGAGGAGAGTTTGATTTTAATTTTTTAAAAACAAGAGAAGTTTTGGTCTATTGGTTTGACATTTCTGTAAATGCCTCAGAATGAGTGGTGTATGTTCTCCAGGTTCATAATTACCTTAAAAACAAAATCTTGAAGCTGCAGCTCCTGCTTGGTCAAGTGTCAGCTTCCCTGCAGGAAGAAGCCACTTGCCTCTGCTGGCTCTGATTTTTGAGTCCAAAATAGCTTATAGGATAAGAGATGAGATTATGGCAAAACAGGTACTAGGGTAAAGGGGTTTTTCAAAAATGTGCACCTGGCCAAACACGGTGGCTCACTTCTGTAATCCCAGCACTTTGGGAGGCGGAGATGGGAGGGTTGTTTGAGCCAGGAGTTTGAGACCAGCCTGGGCAACATGGCAAAACTCCTTCTTTGCTAAAAATACAAAAATTAGCAAGGCATGGTGATGCATGCCTGTAGTTCCAGCTGCTCGGGAGGATGAGGTGGGAGGATTGCTTGAGCCCTGGAGGTCAAGGCTGCAGTGAACTGTGATCATGTCACTGCACTCCAGCCTGGGTGACACAGCAAGACCCTGTCTTGAAAAAAAAAAATTAAAAATGTACGTATATCCTCTATCACTTTTTATTTTTTATTTTTATTTATTTATTTTTTTACAGACAGGGTCTTGTGGCCTTGCTCTGTCACCCAGGCCAGAGTGCAGTGGCATGATCACAGCTCACTGCAGCCTCTACTTCCAACTCCTGGCCTTGAGCAATCCTCCCACCTCAGCCTCCCAAAGTGTCAGGATTACAGGCATGAGCCACCACACCCAGCCAACTATCACTTTTTAACTATAATTTATATTTTTTATTTTATTTTTTTTTTGAGATGGAGTTTCATTCTTGTTGCCCAGGCTGGAGTGCAGTGGCACGATCTCGGCTCACTGCAACCTCTACACCCTGGATTCAAGTGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGTGTGCACCACCACGCCCGGCTAATTTTGTATTTTTAGTAGAGATGGGGTTTCACCATGTTGGTCAGGCTTGTCTCAAACTCCTGACCTCAGGTAATCCACCTGCCTCGGCCTCCCAAAGTGCTGGAATTACAGGCTTGAGCCACCACGCCAGCCTTTAACTATAATTTTTAAAAGAAAAATTAAAATAATCTGGGAAGTGAACAGCTTTGGGCCCCCTTGGGCAGCTGGGCAGTCTGGAAGCCATGGGAACCACCGCCGGTCTCCGTAGTTCTGCAGCATCGCCCTGGTGCTGTAGAGATGTTGTGCTGCTCATTGGCGTCTGTGCGTTGATGGGGGCCCCAGGAGTCAGGGTCTGGTCTTCAGATTTTCCATTTCAAACACGATGGAACCAAGACTGATAAGGTTCTGGAGGGACAACTCCCAGGCTGATTGCACGGCCAGAATGGCACCTAAATTAGTTTACAGAGGGCGATAACTGCTATCAGTTATGAACGCAGGAGACCAAAGTGCTGATCAGAAAAATGCTTGTCGAGAAATAGCCTTTGTACTGGTCTATCTGCCCCAGTCCATGCTGTGAAATGCTTCTGGGTCATCTCCCAAAAGCACAGTTCTGACCAGTTACCTGCTGGGAAGCCACACTTCCGAGTGGGACGAATCCCACAGGACCCTCCAGTGCATGAACGTGTGGGCCTCTCTCTCTGTCTCTCTCTGGCCTCACCTCCTATTTTGTCCATGTGAGTGCTATGCTTGCTTCCTACCCTCCTGCATCCCCTACTGGACGTCCTTCCTGTCTTCCTGCCTTCCTCAAATCTCCTTCCAGCAAAATTTTCTCCATTGTTAATGTACACGTAACTGTGCTCTCCAGCTGAGGCCTCGCTCAGTAACACTCCCTCACTCACCCTGATGCACTCTGTCCCCACCCGGACCCTTAGCCATTGCTTTAGACCAGTGGTTCCCAAGCTTGAGTGGGCATCAGAACCACCTAGAGAGCTAGGAGAGAGCACGGAGACCAAGACTTAATGTGGTGGGACCCACCTGGGCCTGCACAGGCATCCGAAGTCCCCAGGTGAGTCTAATCCCCAGCAAGGCATGAGAACCACTGGTGTGGGCTTCTTACAGCACTTCTAAATTTCTGTCTTCTCCTCGTGTGATTGGTGCATTTGCTAACCCTCCCTATTAAAGGTGTAAACCTCTGTGGATATTATTTATCTCTAAATGTGAATCCCTACCCACCATCCCCACACAGTGTGACTTTCAGAGGTAGGAACTCAGTGACGATGTGTTGTGCAGAACTGACAGACATAAGGAAGAGCTGCGAGAGTGGGCAGGGAATGCTGGAGTGATACCTGCCAGTGTGGTCTTTAGTGAACACTGATGACAACTTGACTGAGAAAGAAACAGGGAGCCAGTAGTGAACCACTTATTAACCCATTTATGCCTGAGGCTGCAATTTTTTGAACTTTTGCAATCAGACCTTAGCGAAGACCTTGAGCAGTAGGATATAAGTAACTTCCACAGGCTTAGCATTCCCATAATGGAACACTAGGCATTCACAGAGCACTTGGTTCTGACCAGGTGACAGTAACCATTAGTCCTTGTCTTGAAGAAGCAGATGATCAGTGGGAGGGATAAGACAGGTAATGATCTAATCAGGCCAAGGGTAGACAGGTACGGCCAGCCAGGCTCGTGGAGGGGAGCGCAGCACTGGGGGTGTTGGCATGGTCCTGTGTAGACTCTGTGAATCTGAAGTTCTTTGAGCACATGCAGGCCAGAGACAGGGCAGAGGGGACATCCTCATCAGAAGTGTGGATGGGGGTGTCGACGACCAGGACCATGAGGGTTGTCTCTGCTAAAGAGGAAGTCCCCAGTGCGTACTCCACAGGCCCTAGCTCTTGGCCTGGCTAGGGATAACTGGGGGTGGACTTCAAACGGTGGCCTGAAATTTAAACCAAAACCAAACACCCGGTAGGATTCACAGGGATGACTGTGGAGCCATGCGTGGCTGAAGGCTCCACCACCCCCATCCTGGCCAAATGTGAGGGTCTATGGAGGAAGCACCAGGGGCAGCACTGGCCTCTAACTGCCTCCCATTTCTGGCATACACACTCCTTCACGCATTCATTTAGCCATCAACACTGCGTGCTTTCTGTGTGCTGAGCACAGAGGCGCATGGATGAGCACATAGGTGATGCATTCCCAACAACATACAGGCACAGGCACCCCCCGCCCCCTACACACACACACACACACACACACACTTTCTCATTCCATCTCCATCTTTTCCTTTGGAAAATCCTGCTTTGGAATATTTGGTTTAGTGATAAGGGGAGAAACAAAAAAGCGTACTTCTGAGAGGCTGTTAGTCTTGACAGTGCTTAGGTTCTAATGACCCTATTTAACTACTATTTTCAGGACACGGTCGATTTCCTCTCTCTTGATTTGTCTTATGAATGCCAAAATGAGGCAAGTCTGCGGCAGAAGCTGAGTAAATTGCAGGTAATTTGAGCCCTTTTCCACATTAATGATGAATGCAGCGAAGGTTTCAGAGAGCAGATTAGACATTTAATGCTTTGGCTACTGCATAAACTGAACTTCCATCAACAGCAGAGACCGGCTGAGTTGTGTCTGGGGAGATTTCCATTGCAAGCATCCCTAGAGAACATCGGATGTTAGGCAGTCTTTCCTAGAAGGGATACAAGGGTTAGCTTGTTAAGGTGAATATTTACATAGATCCTCTGCTTGCTTGTGTAGAAATCTGCCGAGAATTGAGATTCTGGGGTTCATTCTGAGTTTTAAAAAGGCAACTCATCAATGGGGGTGATGGTTCCATAACGATAAGCGTCCACTTAACACCACTGAACTATACATTTAATGGTTATGATGGTAATTTTTTTTTTTGAGATGGAGTCTTCCTCGGTTGCCCAGGCTGGGGTGCAGTGGCGTGATCTTGGCTCACTGCAGCCTCCACCTCCCGGGTTCAAGCGATTCTCCTGCCTCAGCCTCTCCAGTAGCTGAGATGACAGGAACACACAAACACGCCTGGCTAATTTTTGTATTTTTGGTAGAGGCAAGGATTCACCATGTTGGTCACGTTGGTCTCAAACTCCTGACCTAAAGTGATCCGCCCACCTTGGCCTCCCAAAGTGTTGGGATTACAGGCGTGAGCCACCACACCTGGCCTGTAAATTTCTTCTTTATGTCTATTTTACCACATTTTTAAAAAAACAAACATTTCACACCCAGGTCTACTAAATCAGATCCCAGCTGTCTGGACCTTCCAGTAAGCTCAGTACTCCCTCCTGTACCCAGTACTGCTTTTCTCAAGTCTAATGATGCGGGCTCATTAGAGAAAGTGTGTTAGGAGCCTCCTATCCAACTACCAGGTTTCTCTCTTAGGCTTGGTCACTGCTGTGTGACTTACTTGCTTCATTCCCTGGAGTCCCAACTATTCTCCTGTGACAAGGTCTATCTGATGGTGCCCACGTTTTCTCCTGCACCTTCCGAGTGCTGGCAGTATTCCTATAACCCTGGAGAGCTGCCTTCCTGTGGGCCTTTCCAAGGATGTCTGGCCACTGGTATGCACCCACATTGGACCCCCTGCCTCTTCAGGTTTGGGCACAAAACCACTCCCCGCTGCTTTCAGCCTGTGCCTCCCATGAGGGAGGCAGAGCAGCCCCCTACACTCACCTGTGGTTTTGAATCCCACCATCTCCGTCACCAAGGAGGATGGTTTACCTGAGGAATTATCCCTTTAAAGGCCAAGTTTGGCTAGGCACAGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGTGGGTGGATCATGAGATCAAGAGATTAAGACCATCCTGGCCAACATGGCAAAAACCCATCTCTACTAAAAATACAAAAATTAGATGGGTGTGGTGGCACGCACCTGTAGTCTCAGCTACTTGGGAGGTGGAGGCAGGAGAATCACTTGAACCTGGGAGGCGGAGGTTGCAGCGAGCCAAGATTTCGCCACTGCACTCCAGCCTGGTGACAGAGCGAGACTCCATCTCAAAAATAAAAATAAAAATAAAAGGCCAAGCTCAGGAATGTCAAACTGAGCTTCAGCTACATGGACCCTCCATTTTACCTGAGGCAGAGTGCTTGAAATGGACAAATTAGCACCAGTCCTATCCCAATGTCTGTGTGTTTCTAGCTGTCTGTGTAGTAGACACTGCTGCTTTTGTTATCTACAAATAATAAGATGATCTAATTTAGGGGTCCCCAGCCCCCAGGCGGAGCACCAGTACTGGTCCATGGCCTGTTAGGAACCTGGCTGCACAGCAGGAGGTGAGTGGCAGGCGAGCAAGCATTACCACCTGAGTTCCACCTCCTGTCAGATCAGCGACCATATTAGATAGGAGCACAAACCCTATTGTGAACTGCGCATGCGAGGGATCCAGGTTGCGTGCTCCTTATGAGAATCAAATGCCTGATAATCTGAGGTGGAACAATTTCATCCCCAAATCATCCCCATCATCCCCCCTACCTCCCCCAAGTCTGTGGAAAAATTGTCTTCCACGAAACCGGTCCCTGGTGCCAAGAATGTTGGGGACCACTGATCTAATCAAAGGTGACAGATGCAGCCACAAAGACATCTGAGTAACAGCTATTTTAGTGCTTGCTTATCCCAGACACCTGGGAGGCACTTCATTCTCACATCATCTCAACCTCTCAAAACACACCCTAAGGTTTAAACAATTTAACCAAGGTGGTGAGCTCCAGGGTTGGGATTTGAACTAGGGCTTTCTCATTCTAAGACACCACACTGTCTTTTCAAAAGAAAAATAAGTCTCACATCTGTAATCCCAGCACTTTGGGAGGTTGAGGCAGGCGGATCACTTGAGGCCAGGAGTTCAAGACCAGCCTGCCCAACATGGCGAAACCCCGTCTCTACTAAAAATACAAAAATTAACCAAGTATAGTGGCACGAACCTGTAATCCCAGCTCCTTGGGAGGCTGAGGCACAACAATCGCTTGAACCCGGGAGGTGGAGGTTGCAGTGAGCCGAGATGGCGCCACTGCACTCCAGCCCAGGGGACAGAGAGAGACTTGTCTCAAAAAAAAATAAAGAAAAAGAAAAAGAAGTCCAAGGGTTTGGAACATGATAATGAGGGTGGTATTTTTTAAATCACATATGATCAGTTTCAGTTGTTTTTGTGAATGCCTCACCTGAACCTAAACTTGCTTGCTGCCCAGGGCCTGTGCTTGCGGGCTGGCTGACAAGGTTTCTGGGCCTTACGTAACACTCTAGGGTCTTCGAGGTCTGCTTGTGCCTGTGATGACTAAAGCTGCCTTTTGTCCTCCTCATGGAATTAAACCAGAGCTTGTTGCTAGGCTAATGGGTTCAGACAAAGCTGCCATTTTAAAACCATCTCTGTTGTGCGGAGGTGGCGTGCCTCTGACAGGCAGCAGAGGCACTGCATGGCTGGTACTTGAGGATGTGATTTTCAGTCTCTTCCCTTTTTCCATCACAACATTTTTTTTTTAAGATGGAGTCTCGCTCTGTCACCCAGGCTGGAGTGCCCCGTGGCATGATCTCGGCTCACTGCAACCTGCACCTCTGGGGTTCAAGCGATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGACTACAGGTGCCCGCCACCATGCCTAGCTTTTTTTTTTTTTTTTTGTATTTGTAGCAGATACAGTGTTTCACCATGTTGGCTAGGCTGGTCTCGAACTCCTGGCCTCAAGTGATCCACCACCCCCCTCGACCTCCTAAAGTGCTGAGATTACAGGCATGAGCCACTGCACCTGGCTACCACATTTTCAAGACACTTCTACCAGTGTCTGGGTAAAGCACTGAAACTTCAGTCTGTAGCTGGCCAGCCAAATAGTAAAACTCTCCCTGGACAAACTTAGCAGCCATTTATAAATGTTCTGCAATGGTTATCTATTCCTTTTACTTCCTTGGGTAGGGTTGGCCCCTCAGAGGCAGCCTCAAAGATTCGGAAATGACAGCTGACAACCCCTGAGCTGGAGAGCACACTGTTCCTTCCTGGGATTGTGGTCTCATGCCTGGTAGCATTTCCCATGCACAGTTACCACACCCAGGACAGGGAGAACAGCTGAGCCTGCCCTGAGAACAGGGGCTGAGTTGTGTCCACAGGAGCCTGAATCGTGCTGCCCACCTTCCCAGTGTCCCCTCCCTCCCTGTCCTGGAACCCCACCCTCCCCTTGAAGGGGCCTCTTCCTGTGGGCTGCGAGGGAAGAGGGCGAAAGCAGGGCAGGGAGGAGCAGGCACCTGCCCCCATTCCCACCCTCTCCCCGGAGCACCTGCACAGCCCTCTCTGAGCCTCAGGGGCCCTCAGCGGCCACTCCCCAACGGCACACAGCTGTCTGAGGGCCTGAGAACAGCCTCAAGCCCAGAAATGAAGAGCTGCGGCACACTCCAGGGTTCTCGGTTCTGCAGGAGGCACACGAGGAGGTCCCCCATCCACTTATCAGAAAAAGCTGTGCTCCCAGGGAGTTCTCCGCTTGGCAGAAGACAGGCTGGGGGCCTCCGAGGCTCCTCTAGCTCTCCAGCCCTGGGACATCACCGGCTGCCTCATGGCATATCCGTCTTTCAGGAAGGAGAAAACCCTTGCTTCCCTAAGCACGCCACCCTGGGTCATTTGTCCCCACTGCATTGACCCAGCACAGGGAGGGATGAGGACTGTGTGGAGTCACAGCTCCACTAACTGCGCAGTCATGGGCAACCCGCTCGGCCTCTCGAGCTTCTGACTCCCCATCTGTAAAGAAAGTCACAGGTGGCTATGAGAAAGTAAAGACTATGCCTGGCTCCTTTTAAAGTGGTTACTATTGGTATCTTTTGGTAGCTTTTAGTTGCTCAGAGGAGGAAAAAGTTACTGTGAGTAAAATGACCAAACTCCAAGCCCCGCAAAAAAGGTAATTTAAAGTGAAGTCAAACAGCATGTCAGCCCTGAGACGTTTTCCTTGATACTGAAATGGCTAGAAATAAGAAGAGGAAATTACATTTGCTTCCATTTTAATGATAACAGCTAATATAACTTTTCTTTTTCTTTTTTTTTTTTTTTTTGAGATGGAGCCTGGCTCTTGTTGCCCAGACTGAAGTGCAGCGGTTCACTGCAACCTCTGCCTCTTGAGTTCGAGCGATTCTCCTGTCTCAGCCTCCCAAGTAGCTGATATTACAGGCGTGCACCACCAAGCCCAGCTAATTTTTGTATTTTTGATAAAGACAGGGTTTCACCATGTTGATCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCCGCCCGCCTCAACTTCCCAAAGTGCTGGGATTATAGGCGTGAGCCACCACACCCGCTGGGCTGCTGTAACTTTTTGTTTTGGAAAATTTCTAACATCCAAAAGTAAAAAGAATGATGAACCCCTACGAAGCCATCTCCAGCTGCAAGAGCTATTATTGGCTCATAGCTAATCTTCTTTCACCTAACCTATAACTTCTATCTCCCAAACTAGATTTTTAATTAAAAAAAATTTTTTTTAGAGACAGGATCTTGCTCTGTCACCCAGGCTGGAGTGCAGTGGTGCAATCATAGCTCACTGCAGCCTTAAACTCCTGGGCTCAAGCGATCCTCCTGGCTCAGCCTCCCAAGTAGCTGGAACTACAGGCATGCACCACCACACCCAGCTAACCAAACTGGATTATTTTGAAGCAAATCCCAGACATCCATAGATGGTGGCCTCTTAACTAAGCTTCTTAAGTAGCACAGGCTCTGGATGTGAGCAGTGGCCTTGTTCCTGATCCTAACACCGTGCCTGGCTCTAAGAGGTGCCCTGTAAGCCTGTGATGAAAGAAGAGGTGAAGGTGGGGGTTGGTGGGGAGGGTAGACTATGTTATTGAAAATTGTAGATCTTTCTCCTTCAGAGTCTAGAGTTGACTCCTTTAATTAACTGAGATACTTAGAGGTCATTTCCAAAGCCAGAGTAAGTGCCACAGCTTCTCATCTTGAAACTCTGTCCCTGTTATCAAGATGCTTTCCAGGAGGCTTCAGTTCACTTGCTGGTCCTTCTCTTTTTCAGACCATTGAGCCAAAAGTGAAAGTTTTCATCTTCAACCTAAAACTCCCAGACTGCCCCTCCACCATGAAGAACCCAGCCAGTCTGCTCTTCAGCCTTTTTGAAGGTGAGTGGGCCATTGCTGAAGAGGTTTTGAGTGACATCTGGTGCACTGGTGTGCTAGGTAGCAGTCTTGAAAGTCCGGGGTTCCAATGGGAGGAGGAGCATGGGAGGAAGACTAGTATAAAAGCCCACCCCAGACTGGGTTAGTGTACTATGCCTTCCTACTGATGGTCCCTTGCAACCATCCCCCATTGACCCAAGAAAGAGGAGAGGTTCGACACAAGGATGCCTCTTATGGACATAAAGATAGTTAGGCCTCAGCTGTGAGTTGTAAGCATGGAAAATCATCTTTGCCTCCTTTTATTACCTCTCCCTCCAAGGTCTCTCATCTCTGCTCCTCTTGGCAGGGCTTTCCTCTGCCCGTCTCTCTCCAGCCAGCCCTCTCCACTTACCCTTGTTCCATGTCCCCATTATCTCAGTTTGCACATGGCCCTGGTGGGTACCTGACCAAGATGCTAAGAACTTTCAAGTGTAGGACTCACAGCTCAAGGGCCATCTTCCTAGTGTCTCTTAGTTCAAATTCTTGAAAGTGAATCTGATTGTTGGCCATGGATCAGGAGGCTGCTTCTGGTCCAATCAAGTCAGGGGAAAGGTGGGGTCTATGATAGCATTGAGGCTGGCAGTCACTGGAGACTCTGCAAGAAGGGAGGAAGGGGCAGAGAGGAATGTGCCTGAGGGCCCTGATATTCACTCTTGTCTGTCTGTCACTCTTGTCCATAGGCCAGAGTATGGGAGAAGTCAGTAGTGTTCTTTCTACCACTGTGTCTTAGTCTGTCTTGTGTTGCTATAATAGAATACTCTAGACTGCGTAATTTATAGAGAAAAAAGGTTTATTTAGCTCATGGTTCTGGGGACTGGGAAGTTCAAGATCAGGCAGCCACATCTGGTCAGCTTCTGGTGAGGGCCTCATGCTGCATCCTAACCTGGCAGAGAAGTGGAAGGGGAAGGAAGTGCCTGCAAACAGATAAAACACCAGAAGCAGCCTCACTTTATAACAACCACTGTATGGTAACTAATCCAGTCCCACTAGAATGGGAACTCACTCACTCCCTGGAGACTGCACTAATCCCTGAAGGCGGATCCCTCATGACCCAAATGCCTCTTAAAAGTCCATAACCTCTCAATAATGTTACACTGGCAATTAAATTTCAACATGAGTTTTTGTAGGGACAAACCACATCCAAACCATAGCACACCTGGACCCCATCCCAGCAATGGGTTGGCTCCACAGGGCCCCTGAAGTTTATCCATAGATACCATTTAATAATGCTTATCTGAACTCACCTTTGATGGCATAACAATTTCTGTTCTTAGATCCCACCTCTCTCTAAGTGGTTAAAATTGTACCTTTTCTGTTTATGCGTATTTTACCACAATTTTAAAAATATTCCACCCCTTTCTGATCTTTGAGTTCTTCTAAATCAATTGTTCCCAAAGTTGTGGTCCCCAGACCAGCAGCATCAACATTAGCATCACCTGGGAGCTCATTAGAAATGCATATTCTTGAGCCCTACTTGAGACCTACTGCATCAGACACTCTTGGGGTGGGACCAGCAACCTGTGTTTTAACAAGCTCTACAGATGATTTTGATTCACCTTGAAGTTTGAGAACCACTTTTAAGTAATAAGTAAGTAGAATTCACTTCTAAGCTTTTAAAGTCGTTAGCCCATGGTATTTTCCAAAGGGGCTTGGAACCACTGCTTGGAGTTTGGGTGTGTCGTATGAGCGAGGAAAACTGTTGATTTCAGACTTGCATGTTCTTAGTTGGCATTTACATAGAGTTAATTCAATTTCTTCTCTTACAGCCATAAATAAAGACCAAGTGCTCACCATTGGGTTTGATATTAATGAGTTTCTGAGTTGTTCATCAAGTTCCAAGAAAAGGTAATGTCAGGTTGCCTATTGAACTATCAATAACTGATATTTGTAGAATACTCTGTTTCTTTTACATGTATAATCTTATAGGACTGTTGTCATCAACTCTTGGGCCACCAAGTTGTTTATTGCCATTCAATTGATGAGAAACAAAGACTCAGAAAGAAAGTAGGGAGTCTCTTACTGAGTGGGACTCACGAGGTTCTGCTTTCTCATCTGTCAGTTCGCCCAACTGTGCGCAACTAGCATGATGGCTCACACCTGTAATCCCAGCACTCTGGTAGGCAAAGGTGGGAAGATTGCTTGAGTCCAGGAGTTCAAGACTGGGCTGGGCAACATGGTGAAACTCTGTCTCTACAAAAAATTACAAAAATACAAAAATTAGCCAGGCATAGTGGTGCATGCCTATGGTCCCAGCTACTTGGGAGACTGAGGTCAGAGGATCGCTTGAGCCCAGGAGGCAGAAGGTGCTGTGAGCCGAGATTGTGCCACTGCACCAGCTAGGATTTGAACTCCTGCCTTCTAGTTTAAGATCTTGCTGGAGACAGTGTGAGCTTGTGGAAGAGTGGCCAGGTTAAAGATAGATCTGACCATCCCACTGAAGTTACATGATGAGTAAGAGAAATGGGTTCTCTGTGCCCATTTCCTCAATATGAAGGGTGAAACATCCTCCCTGGAAGGAGACAGCATAGACAGGAAAGCCAGCTGTTGCTGCAAGCCAGATGCCTAACTTGACAACATTCTAGGGACAGAGGTTTGAATCTATGGTGGGGTTGTAAATAATATCTTACTGAGTGACTCAAGTGCCAAAGTTTGCAAATGATATGCTTTAAAAAATTAGAGCAGATTGCCGGGCACGGTGGCTCACGCCTGTAATCCCAGAACTTTGGGAGGCTGAGGCGGGTGGATCACGAGGTCAGGAGATCGAGACCATCCTGGCTAACACAGTGAAACCCCGTCTCTACTAAAAATACAAAAAAATTAGCCGGGCATGGTGGCGGGCGCCTGTATTCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATGGTGTGAACCCTGGAGGCAGAGCTTGCAGTGAGCTGAGATCGCGCCATTGCACTCCAGCCTGGGTGATGGAGCGAGACTCCATCTCCAAAAAAAAAAAAAATTAGAGCAGGTTTTGAGGTTGTGCCCATAATTACATTGCTTTCATGCACGCATATCCCAATACGCTCTTGAGTGAATGAGAAAAGAATGGCATGGTAGCCAGCACTGGGAATTGCCCAGAACCTCAGCGCTCCTGGAGGGATGTTACATGCATGAAGTTCATGCACCATCCCTTAGGGTGGGACAAAAGCTGGAGACAATAATTGACTTCCAGACCTTCAACTAGAGTTCTCTTTGTCACTCACCTCTGATATTTCCAGTGCAGCCCTGTTCAGGGTCATTCAGTGCCAACACCTCTTTGTACCTTTCCTCTTCCAGTACTTGACTGTCCTGAGCCATATCCTCCTTCTATTGTTCCATCCTCTCAATCCACTTCAAATCAGCAGTTCAGATCAACAGACCCGCAGCAGCAGCTGTACCTCAGGCTTTGTGTGGCAGTGTAGACTTGCAATAGACAGACATGGGTTACCACTGCTTCCTGACCACACTTGGAGTCAGCCAAGAAGGACTGGCTGTGGCTTCACAAAAGAAAACTCAAGAAGCCGTAATAAAATGGGTTCCCTGAAAAGCCAGTGATTACAAGTGACACCAGCTCGGAGATTCTCTCAGCGCTTGGTGGTGCTTTGCCTGAAGGGGAGGGCCAACACAGGCTTACATTAAACGGACAGCCAGGGGCCCCTACTCAGGAGGATGTGGTTGGAGAACCCGCTGAATGCAGGGGCGGTGGGTGGGGAGTGATTCAGAAATGACTAACGTAGTTTTCTCCTTCAGAATTCTAGACCTTATAGGGGAGACAGAGCGGCCCCTGAACAACTCTCACATGAGAATGTGGCAGGAGACACACAGCACTGCACCCTAAAAAAGGGAGAACTTAGGAGCCCTGGGTGTGGAGCCTGAGGTGCCGTCATGTGCTAAATCCCTCTCCATGGCCGGCAAACCCAGGGTTCCCAGCTCCAAGCCTGTACAGCAAGTTCTGTTTCATCCCAGGGCAGTTTATTTGTTTCCTTAAAACTCTTCATGCAATCCAGGAGGACAGTCAGGGCCAGGAGGGCTGGGTAAGAGCGAGGTTCGGAGCAGCGGACGCTGTGTTTATGGGCACCGTGGATACGCAGAGGCTGAGCACCGAGGGGACGCGACACATCCCCAGCTGCGATCCACAGTCGCATATGTTTTGGGATCATCTACGAAGGAAAGGATCATTTCCCTTTCCCCCTAATCTAGCAAATGTCAGGGAGTTGATGCTGAGTGAAACAGAAGGCTCTTAGGTTCGGAAGACGCCTCCTGCTGGGTGACCCGGCCCCAGGCTTCGCTTTTTGAGAGGAAGATCCCTGTGCAGTGTTCCTGTCACTTCTCCCAGTTCTGCGCTGTGGGCCAGCCTGGTATAGGTGCCATCGGCTCTCCACACGATGGCTGTGTGACAGGGAGATGCTCTTGTACTCCACGTCCTTTTCCCCAGGCATGTGCCACACTCCTCAAGGCCGACCTCTGCTGGCTGTAATTTGCAGCATGCATGCTTTCCTGCCTGCATGACATGACTCAATTTAGAAGTTGGCAATCACAATTGAAAAAAAAGAAAGAAAAAAGAGGAAAGGAGAGTGGGAGGGAGGAAAATGGACTAGAATAAAATAGAAAAATCAGACCACATTTTGTATAAATGCAGCATGCACTTTTATGTGATGTTTTATTTCATATGCATATACATATGTACATGCATATACATGCACATAAACATGTATGTGTATGAGGTTAAAAGCACAGAATCTGCTCCACCACCTGCAAGCTGTGTGCAAGGAGAAGTTACTCATAATCACCAGGAATCCAGGCTCCTTTGTTGTTGCTTTGTCACCTTTAACAAGGTTTCTACCTCATGGTTCAGGATGGATGCTTGAGCTCCTGCCATTATGTCAGAAATCCAAGCAGCATGAAGAAAGGAAGCAGTAGCCAGGACTGATATTCAACTGGTCCACATCAGTGTACTTCTGTACCAGCTGATAAATAGCTGCTGGCTCCCTGAAATCAGGAAGTTTTCCATTTCTTTTTCTTTTTTTAGACGGAGTTTCACTCTTGTTGCCCAGGCTGGAGTGCAATGGCGTGATCTCAGCTCACCACAACCTCTGCCTCCCGGGTTCAAGTGATTCTCCTGCTTCAGCCTCCTGAGTAGCTGGGATTACAGGCCCCCACCACCACGCCTGGCTAATTTTGTATTTTTAGTAGAGACGGGGTTTCCCCATGTTGGTCAGTCTGGTCTCGAACTTCTGACCTCGTGATCCACCTGCCTCGGCCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCACACCCAGCCCCCGTTTCTTTTAGTCTCAGTTTCCTCCTGTGTAGGATGAGGATAAATCATTGTAGTACCCATCTCATAAATTTGTTGGGAAAATTAAATGAGATAATACACATCCATTCTTATTACAGTGGCCAGCGCAGAATCAAGGCTCAATAAATTGGAGTCATTGTTATTATTAGCTATGTTTTACAAATAATGAATTATAAGAAGAATAAAGTATGGTACTAACACTGAAGACAATATTCAAGGGCAATCTAATACAGAGAACAGTAGCATTGTTCCTGTTTTAACCATGAAAGAGCAAGAATCACTCACCTCAGAAGGGGTAATAATAATAATTTTCCCATCTGCCAGTTACTTCCTGCCCATGGCTTTGCAGATATGACTTTTGACTCAGTGTTTCTGGAATCTTTTCTGATTAAACACAGTAGTCATCCGTAGGGGGCATTTGTAACTGCCAGCGTCATTCATTATTATGTCCCTTTTTGTCCCTTCAATCCTGCAGCATGTCTTGTTCTCTGACTGGCAGCCTGGCCCTTCAGCCTGACCAGCAGCAGGACCACGAGACCACGGACTCCTCTCCTGCCTCTGCCTATCAGAGAATCTGGGAAGCATTTGCCAATCAGTCCAGGGCGGAAAGGGATGCCTTCCTGCAGGATACTTTCCCTGAAGGCTTCCTCTGGGGTGCCTCCACAGGAGCCTTTAACGTGGAAGGAGGCTGGGCCGAGGGTGGGAGAGGGGTGAGCATCTGGGATCCACGCAGGCCCCTGAACACCACTGAGGGCCAAGCGACGCTGGAGGTGGCCAGCGACAGTTACCACAAGGTAGCCTCTGACGTCGCCCTGCTTTGCGGCCTCCGGGCTCAGGTGTACAAGTTCTCCATCTCCTGGTCCCGGATCTTCCCCATGGGGCACGGGAGCAGCCCCAGCCTCCCAGGCGTTGCCTACTACAACAAGCTGATTGACAGGCTACAGGATGCGGGCATCGAGCCCATGGCCACGCTGTTCCACTGGGACCTGCCTCAGGCCCTGCAGGATCATGGTGGATGGCAGAATGAGAGCGTGGTGGATGCCTTCCTGGACTATGCGGCCTTCTGCTTCTCCACATTTGGGGACCGTGTGAAGCTGTGGGTGACCTTCCATGAGCCGTGGGTGATGAGCTACGCAGGCTATGGCACCGGCCAGCACCCTCCCGGCATCTCTGACCCAGGAGTGGCCTCTTTTAAGGTACTTCCCAACCCTGCAGCTCCTACTAATTGGAGGAGAAAGGACATTGGCTGGAAGAAAGTCATTTTCTCTGTTTTCTTCCATCAGCAAGCCTTTACTTTTTGTTCTTTCTTTAAAGGAAATCGGGGAGGGAAGGAACCATAAATTGTTAATTAAATTGTTAATCACCTGGCCAGGTGCAGTGGCTCATGCCAGTAATCTCTTCACTGTGGGAGGCCAAGGCAGGCAGATTTCTTGAGTCCAGGAGTTCGAGACCAGACTGGGCAACATGGCGAAATCTTGTCTCTGCAAAAAAAATACAAAAATTAGCTGGGCATAGTGTTGCACGCCTGTGATCCCACCTACCCAGGAGACTGAGGTGGGAGGATCGCTTGAGCCTGGTATGCAGAGGCTGCAGTGAGCCAAGATTTTGCCACTGCACTCCAGCCTGGTCAACAGAGCAAGACCCTGTCTCAAAAAAAAAAAAAAATTGTTAATCACCTTATTTTGTCCAGACCCTCTACTGGTACTTTTAAAATATAATAGTTATTACATACTCAGAATAGCACTGATGGATGAGTGAGGCTCAGACAGGATAAACAATGTGGTCATGATGACATTGCTAGTGAGTAGCCAGGTGGAAGTCAGGATTTGAGTCCGAGTCTTCTTGACTCCAAATCAAGTTCCTCATGTCAGTGACAAGTGACAGAAAGCTCAACCCAAAGTGACTTAAGCAAGGAGAGAATTTACACATTCAGGAGGTTGAAAATTCTTCATTTAGCCTGATCAGGGTGGAATTTGATCAGAGCACAAGCGATGCCACCAGGACCCAGTTCCTCTCCTGTTCCCCTCGTTGGCTTTATTCTTGGGCTACTTTGCAGCCAGATGGCAGCAGCCCTGTTTCTGTTATTTCATGACCAGTAGGGAAAAAAGCTCGGCTCTTTTCTGTCCTTCCCAACAGAAGTGTCCCTGTGTCTCATGGTCTCTCAGTGGGTCGCATACCCACCCTGAACTCATTGCTGGGTCCTGGCCAGGAGGCTGTGACAACTGACTGGTGTGGGCCAAGGACCCATGCTTTACTCCTCAATACGCTCACTGAGAGCTGAGGAGGCAGAGAGGGAAGAGGCAAGGAGGCATTGGCTTTCCAGACGCCAGTCAGGGCATCGTTTCAACAAGAAGGGGATGTATACACTTGGCACAAAATTTTAAATGGCTATAGGTCTTTTCCATTATTTCCCTTTTTCTTCGGAATCTCCCAGAAAACCCACCAGCTTTCTTTGGCAGCGTTCTTCAGGATGGACATGTTAGTCTGCCTTTTCCCTGTCACTGTTTGGGTTCCATTCTTAAAGATGAGTATTTCCGGCCGGGCATGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCAGGCAGATCACCTGGGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAAATCCTGTCTCTACTAAAATACAAAAATTAGCCAGATATGTTGGTGGGTGCCTGTAATCCCATCTACTTGGGAGGCTGAGGCAAGAGAATCACTTCATCCTGGGAGGCGGAGGTTGTAGTGAGCCGAGATTATGCCACTGCACTCCAGCCTGGGCAGCCTGGGTGACAAAGCGAGACTTTCATCTCAAAAATAATAATAATAATAATAATACTATTTCCTGTGTCCCGTGCTAGGTGTTAAAGAGGGGTCACAGCCATGCCCTGGACATGTTATAACCTTTTGGGGAAGATGACACTAACACACGTAGCAAAATTACAGAACTCTTCAAAAAGACCATTACTAGGGTGAGCTGGAACATCATGAATAAATAACTCAATGAAATGTCAAACATTGTAGCACGTAAGTCAGAGTTTCCATTCAGCTCTTGAGCAGCAAGTGACTGCACATATGGAGCCATTCCACCACCCACCTTCTGAAAATGGAAGGTTTGAAATCTTCTGTTTCACTCCTTGTTGTAAAACTTATCTAAACCTTTTCTTCTTAGGCTTTTCTTCCCCTTTGAACTGTGGCTCTTGAGATATCTTTACAGAGACTTTGATGAGAAAGTCCCCTTCACTCTTCTCCTTCAATTCTTCCCTTTATTTATTAGTTCAATGTTAGAATGCCATTGTCTTAGTGAGCTTGAGCTGCAATAACAAAACACCATAGACTGGTGGTTTAAACAACAGATATTTATTTCCCAGCATTCTGGAGCTGGGGAAGTCCAAGATCAAGGGACCAGCATGTTTGGGTTCTGGTGAGGGCTCTCTTCCTGGCCTGCAGGCAGCCACCTTCTTGCTGTCCTCATATAGGGGTGGTTGGAAGAACTCACTGGTATCTTTTCTTATAAGGACACTAATACCACCACCATGAGAGTCCCACCCTCACGACCTCCTCTAAATCTAATCACCCCTCAAAGGCCCTATCTCCAAATACCATCACATTGGGGATTAGGGCTTCAACATATGAATTTTGATGGGGGAACTAATTCAGTTCATAGCAACTATTAAGGAAAAAATTTAAAGAAAAAAATGTCTTGGGCCAGGCGCGGTGGCTCACACCTGTAATCTCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACGAGATCAGGGGTTCGAGACCAGCCTGGCCAACATAGTGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCCAGGCATGGTGGCGGGCACCTGTAGTCCCAGCTACTCTGGAGGATGAGGCAGGAAAATCGCTTGAACCTAGGAGGTAGAGGTTGTGGTGAGCCGAGATTGCGCCACTGCACTCCAGCCTGGTGACAGAGCGAGACTCCATCTCAAAAAAAAAAAAAAGGTATTTTATTATTTTAGAGTATTTCCTTCCAGTCTTGGCTTCTTTGCATTTATGATGTTGTCACAGGGCACCTCAGATTGTTGCTCACTGTTTACTTGATATCATGTCATTAGCACTTTTCCCTATTATTAAGCAGTTGTGATGACTGTTTTTGATGGTTGTCTAATATTCCACTGAGCTCCCTTCCTGACACTGGCTAATTTAGAAGAAAGACATCCTTGCTTTTGGGAAAGGAGCACAGTTTGCCATTCGAGTTTAGAACTCAACTAAAGACGAATATTTGGGATGTGAGAAGCAGCCAGGCGTTTTAGTGATTATGGCATATTTTGACATGAAATTCTTCAGAGGTAAGTGTCATCCTCTTCAGATGATAAAAAGGAAAAAGGAAAGCAAGCTAAGATTTAGCAAGCTCACTCCTGCTCTCTGAGTTTCTGCTCAATGCTTTATATACACGTATTTAATCATCCCAGATGGATCCATAATTACCCACTTCTTGCCAACAAGTAAGCTGAAGTTGAGAGAATAAATTGTCCAAGTTAACAAGTTAGTAACAGAGTCAAGATTTAAATCTAGGCTTGTTATATTTCAGACACCCTCCTTTTGCCCCTACTTCCCACAAAAAGCTGTTGCTGTTTGTAAGAGCATGAGTTAAGTCTACAAAGAAACTCCTAGTTTGGTTTTTCTTCCGAGATTCCATTAAATAGAAAGAAGAACAAATAAGCAAGAAAGAGCCCTCCTCTCCCTTGTGTGCTAATTAAAATAATCCTCCCCATGGACTAATTTCTCAAAAAGCACTTATTAAGGACAGAGGCAGAATAGCAGTGTGGGTAACAGTAACAGGCTTGGAATCAGACTGCCAGGGGCTTGTTCTCACTTTACTCCTGACTCTTGGCCTTGTGAAGTTACTACACAGCTCTGGATCTTGTTTTCACCATCTCTATAGTGGTGATAATAATTGTACCTGCCATATGAGGTTGTGGTGAGGATTACATGAGAGAATTCATTAAGTAGTAGGTGCCCAATATATGTTAGTTGTTATTGTTAATGTGGTTGTTGATGCTGTTATTTTGTTGACAATTTTGAAGAATTGCTGTTGACTTGTTGACTTTGTTGATGATATTGGTGTTGACTTGTTGACATTGTTGACTTGATATTGTTGACCTTTTCACATTGGTGTTCACTTGTTGATGCCGTTCATGTTGGTATTGATGTTGTTGACATCAACATCGTTGATATTATTGACTTGTTGACATTGGTTTGGATTTCTTGACATTGTTGACTTGTTGACATTGTTAGTATCAGTCTTAACTTGTTGTTTATGTTGTTAACTTGTTAACTTTATTATTGACTTGTTAACATCATTGATGTTAGTGTCAACATTGTTATTTGTTGATGACTTGATGTTAACATTAGTGCTGATGTTGTCAATATTGACAATGTCAATGTTGTTGTTGTCTTTATTGACTTGTTGACATTGTTGTTGACTTGTTATTGGTTCATTAGTCCTGTTGTTGTCATTGTTATTATTACTAATCACTGTATTTCCTTCTTACTTTTTCACAAGGTGGCTCACTTGGTCCTCAAGGCTCATGCCAGAACTTGGCACCACTACAACAGCCATCATCGCCCACAGCAGCAGGGGCACGTGGGCATTGTGCTGAACTCAGACTGGGCAGAACCCCTGTCTCCAGAGAGGCCTGAGGACCTGAGAGCCTCTGAGCGCTTCTTGCACTTCATGCTGGGCTGGTTTGCACACCCCGTCTTTGTGGATGGAGACTACCCAGCCACCCTGAGGACCCAGATCCAACAGATGAACAGACAGTGCTCCCATCCTGTGGCTCAACTCCCCGAGTTCACAGAGGCAGAGAAGCAGCTCCTGAAAGGCTCTGCTGATTTTCTGGGTCTGTCGCATTACACCTCCCGCCTCATCAGCAACGCCCCACAAAACACCTGCATCCCTAGCTATGATACCATTGGAGGCTTCTCCCAACACGTGAACCATGTGTGGCCCCAGACCTCATCCTCTTGGATTCGTGTGGTGCCCTGGGGGATAAGGAGGCTGTTGCAGTTTGTATCCCTGGAATACACAAGAGGAAAAGTTCCAATATACCTTGCCGGGAATGGCATGCCCATAGGGGAAAGTGAAAATCTCTTTGATGATTCCTTAAGAGTAGACTACTTCAATCAATATATCAATGAGGTGCTCAAGGGTAAGAACAATGGATGTGCCAGTGATTGGAAGGTGGGTGGTACTTCTCCAAGTCTTCAAAGTTTCGTTTAATAAGACAAAGAAAGTCTCCTAGAGAAATGGAGCCAAAGAAAGCATAATAGTGGGGAGTCCTTCACGTCAGCTCCCTAACTCTCTTTCCCAAGGTTCATTCATGTCATTCACGTCTTTGCTCCTGTGGTTTTTTTGTTTTATTTTGTTTTGTTTTTGAGACAGGGTCTTGCTGTGTTGCCCGCTGGAGAGCTGGAGTGCGGTGGCATAAACATCGCTCACTGCAGCCTCAAAGTCCTAAGCTCAAGCCAACCTCCCACCTCAGCCTCTTGAGTAGCTGGGACCACAGTGCAAACCACTATGCGTGGCTTATTTTTTTTTTTTTTAGACATGGGGTCTCTTCACATTGCCCAGGCTGGTCTCAAACTCCTGGGCTCAAACAGTCTTCTCACTTCAATCCTCCCAAAGTGTTGGGACTACAGGCATGAGCCACCATGCCTGACCCAGTGTCCTTATTGCCTAGAATACCTCCCTCTACATTCTGCCCATCTGAGTCCTATGCAGTCATCAAGCCTCATTATCTTTTTTTTTTTTTTTTTTTAGAATCAGGGTCTTGCTTTGTCACATAGACTGGAATAGAGTGGCATGTTCATAGCTCACTTCAGCCTCAAACTCCTAGGCTCTAGTGATCCTCCGGACTCCAACCCATTAACTCTTCCCTCATTGCTCCAGCCACACTGGTCTTCATTTCCTTTAATTGGCTATAAAATCTACATTCTATGTCATTGATTTTGATCCAATTGATCCACATCTACGGTCTCATTCTTACCTGACATACTATTTCATATGTCTTTTTATTCCAATTGGAAAGGTATGGCAAATAGATTTCATCTCACAGGTCAGTCCTCAGTTAACCCAGGATGTTCTTATCTTGAGCTCATTAACTTAATTACATCTGCAAAGATCCTTTTTCCAAATAAGGTTATATTCACTGGCACCACGGGTTAGAATTTAGACATATCTTTCGGGGGTCACCATTCAATGCACTGTATCAGGGAATATATGTTTTAGGCTTTATTTATTTATTTATTTTAATTTTAATTTTTTTAGACAGGGTCTTGCTCTGAGGAGTACACTGACCTGATCTGAGCTCACTGCAGCCTTGACCTCCTGGGCTCAAGCGATTCTCCCACCTCAGCCTCCCAAGTGGCTGGGACTACAGGTGCACACCACCACACCAGGCTAATTTTCTTTTCTTTTTTCTTTTTTTTTTTTTGAGACGGAGTCTGGCTCTGTCGCCGAGGCTGGAGTGCAGTGGCGCGATCTCAGCTCACTGCAAGCTCCGCCTCCTGGGTTCACACCATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGACTACAGGCTCCCGTCACCACGCCCGGCTAATTTTTTTGTATTTTTAGTAGAGACAGGTTTTCACCATGTTAGCCAGGATGATCTCAATCTCCTGACCTCATGATCCGCCTGCCTCAGCCTCCCAAAGTACTGGGATTACAGGCGTGAGCCACCACGCCTGGCCACATTTTTTTTTTTTTCAATAGAGACAGAGTCTGGCCATGTTGCCCAGGCTGCTTTATTTCTTTTTTAAAATATGAAATATGTTGTCACTTAATGTCAATATTACTAAACATAAACAAGGAAAAGCAAAAGCATATTGTTAAAAGATTGATTTATGTATTTTATATAAAACCAAGACACCCTGGTGGTAACACATGGGACACATCCCCTCCACTATTTATTTGAAGATGGGGATGCTCCTTTGGTTTGAGATTTATGGCCACAAAAGTGGCCTGATGTTTTGTGAGGATCTTGTGGATTTTCCTTTTATCCATTCTGAGGAGGAAAAAGGATGAACATTTGGGGGGTAACCCAAGTCACCCCCAAAACTTTCATGTCAAAAGGAACACTTTTTAAAAGCAACATTAGTAACAGAGACTATACTTCTAATTTTTCAATAAAAACTATTTTATTGAAAAACAGTCCTCGCTGGTCCGAGGTAATAACTTATCTCAGTTGACTGTTCACAACCAGTTACAGATCAAACAGCTTCTAATACCCTTCTCCCCCTTCTCACTACTGCACTGACTAGTCTTTAAAAGATAAATAAATAAAATAATGGTGCACTAGCTCACTCCTGTAGTCAGTCCCAGCTACTTGGGAGGATCACTTGAGTCCAGGAGTTTGAGACCAGCCTGGGCAATACAGTGAGACCCCATCTCAATTAAATAAATCCATAAATAAATAAATCTAATTTTTATTTTTCTTCACAGCTATCAAGGAAGACTCTGTGGATGTTCGTTCCTACATTGCTCGTTCCCTCATTGATGGCTTCGAAGGCCCTTCTGGTTACAGCCAGCGGTTTGGCCTGCACCACGTCAACTTCAGCGACAGCAGCAAGTCAAGGACTCCCAGGAAATCTGCCTACTTTTTCACTAGCATCATAGAAAAGAACGGTTTCCTCACCAAGGGGGCAAAAAGACTGCTACCACCTAATACAGTAAACCTCCCCTCCAAAGTCAGAGCCTTCACTTTTCCATCTGAGGTGCCCTCCAAGGCTAAAGTCGTTTGGGAAAAGTTCTCCAGCCAACCCAAGTTCGAAAGAGATTTGTTCTACCACGGGACGTTTCGGGATGACTTTCTGTGGGGCGTGTCCTCTTCCGCTTATCAGATTGAAGGCGCGTGGGATGCCGATGGCAAAGGCCCCAGCATCTGGGATAACTTTACCCACACACCAGGGAGCAATGTGAAAGACAATGCCACTGGAGACATCGCCTGTGACAGCTATCACCAGCTGGATGCCGATCTGAATATGCTCCGAGCTTTGAAGGTGAAGGCCTACCGCTTCTCTATCTCCTGGTCTCGGATTTTCCCAACTGGGAGAAACAGCTCTATCAACAGTCATGGGGTTGATTATTACAACAGGCTGATCAATGGCTTGGTGGCAAGCAACATCTTTCCCATGGTGACATTGTTCCATTGGGACCTGCCCCAGGCCCTCCAGGATATCGGAGGCTGGGAGAATCCTGCCTTGATTGACTTGTTTGACAGCTACGCAGACTTTTGTTTCCAGACCTTTGGTGATAGAGTCAAGTTTTGGATGACTTTTAATGAGCCCATGTACCTGGCATGGCTAGGTTATGGCTCAGGGGAATTTCCCCCAGGGGTGAAGGACCCAGGCTGGGCACCATATAGGATAGCCCACGCCGTCATCAAAGCCCATGCCAGAGTCTATCACACGTACGATGAGAAATACAGGCAGGAGCAGAAGGGGGTCATCTCGCTGAGCCTCAGTACACACTGGGCAGAGCCCAAGTCACCAGGGGTCCCCAGAGATGTGGAAGCCGCTGACCGAATGCTGCAGTTCTCCCTGGGCTGGTTTGCTCACCCCATTTTTAGAAACGGAGACTATCCTGACACCATGAAGTGGAAAGTGGGGAACAGGAGTGAACTGCAGCACTTAGCCACCTCCCGCCTGCCAAGCTTCACTGAGGAAGAGAAGAGGTTCATCAGGGCGACGGCCGACGTCTTCTGCCTCAACACGTACTACTCCAGAATCGTGCAGCACAAAACACCCAGGCTAAACCCACCCTCCTACGAAGACGACCAGGAGATGGCTGAGGAGGAGGACCCTTCGTGGCCTTCCACGGCAATGAACAGAGCTGCGCCCTGGGGGACGCGAAGGCTGCTGAACTGGATCAAGGAAGAGTATGGTGACATCCCCATTTACATCACCGAAAACGGAGTGGGGCTGACCAATCCGAACACGGAGGATACTGATAGGATATTTTACCACAAAACCTACATCAATGAGGCTTTGAAAGGTGTGTGAGGGTTCAGTTCCCCTTAAAGAAATCTTCCAACATTCCCTGGGTCATATGCCTGAAATGTTTTGCCGGGATGATTTATGAACCATCAATAGATCTCTCTCTTAGGTCCCAACTCCATAATTCTTTATCTGTTTGGGGGTTATATGTTCCTTGAGAAGCTGGGGGAAAAAAGGCTATGGTTGTCTCTGCAGAGGAAAAATACAAACACTACACACACAGGGTCCCTAGCAGAATATTCATATTGTCATGGGTCCATTCATTCATTACTGAGCAAGTTCTATTTTAGGTGCTGGGAATACAGCAATGAACAAAACACAAATTTCTGCCTTCGTAGAGCTTATCCCATGCACCCCAGAACAAGAAAGTTGTGTTAATTCAAACTTCCTTTTTTTTTTTTGAGATGGAGTCTTGCTCTGTTACCCAGGCTGGAGTGCAGTGGCCACCGTGCCCGGCTAATTTTTTTGTATTTTTGGTAGAGATGGGGTTTCACCATGTTGGCCAGGCTGGTTTCAAACTCCTGACCTCAAGTGATCCACCTGCCTTTGCCTCCCAAAGTGTTAGGATTACAGGCATGAGCCACTGTGCCCAGCCTAAAAATCAACTTTTTGTTTCATTGATTTTTTTTTTTTTGAGACAGTCTCACTCTGTTGCCCAGGCTGGAGTGCAATGGTGTGATCTCGGCTCACTGCAACCTCCATCTCCCGAGTTCAAGCAATTCTATTGCCTCAGCCTCCCCAGTAGCTAGGACTACAGATGCATTGGGCTTAACTTCCTAAGTGGGCTGAAGATCTTGAATTTGGGGCATGTTTCTTTACCCCAAGAGAAGAATTTGATTTGGAGATGATCAGTCATTAATGCTTGCTGTGGGGTCATAGGTACTGGCAGAAAAGTAAGATCAACAGATGATCTGTTGGTCTCTCAGGGTGTATGTCACTTTTAGGACCACAACCTTGAATTGAGGAGCTGGCAGTGGGTTGGGTGCATGGAGGTCCTGACTCCTGGTGTCAAGCTCTCCTCTGCTCCCTTTGGGATGTTCCCAGCCTACAGGCTCGATGGTATAGACCTTCGAGGGTATGTCGCCTGGTCTCTGATGGACAACTTTGAGTGGCTAAATGGCTACACGGTCAAGTTTGGACTGTACCATGTTGATTTCAACAACACGAACAGGCCTCGCACAGCAAGAGCCTCCGCCAGGTACTACACAGAGGTCATTACCAACAACGGCATGCCACTGGCCAGGGAGGATGAGTTTCTGTACGGACGGTTTCCTGAGGGCTTCATCTGGAGTGCAGCTTCTGCTGCATATCAGGTGAGGAGTTCAGGATGGTGGGACACCAGTGACTGCACACCTGCCCTGTGCCAGGCTCTGGGCTCAGTGCTGGGAAGGGCAGATGCATGAACAGGAGATTCCAGCAGACCCATGGAGGGGCAGCATGGGGTCTGGGAGGTCCTCTTCCAGACAGCCTGGCTTCTTCACACCCCCAAATTTACCCAGACCAGCATGGTGAGGCTCGCTGTCCTGAGATGGGTGCTTGCAGGGCCAGATGGAGCAGGGGGAGCCTGGGCAGAAGTACTTGGGCAAAGACACACAAATACAGTCACGCATTGCTCACAACAGGGATTTACTCTGAGAGATGTGATGTTAGGTGAGTTCATAGTTGTGTGAACATCACACAGAGTGCACTTACACACACCTAGATAGTGTAGCTTACTACACACCTAGGCTACATGGTATAGCCTAGTGCTCCTAGGCTACAAACCTGTACAGCCTGTTACTGTACTGAATGCTGTAGGCAGTTGTAACACAGTGGTATTTGTGTATCTAAATGTATCTAAATGTAGAAAGGCACAGTGAAAATATGGCATAAAAGCTGGACAGGGTACTCACCATGAATGGAGCCTGCTGGACTGGGAGTTGCTTTGGGTGAGTCAGTGGGTGAGGGGTGAGTGAATGGGAAGGCCTAGGCATTACTGTACACTACTGTGGACTTTATGAACACTGCACACTTAAGCTACACTAAGTTTATTTTTTAAATGTTTTTCTTTCTTCAATAACACATTAACCTTAGCTTACTGTAACATTTTTACTTTATTAACTTTTTAATTCTTTAAACTTTTTTACTCTTTTGTGATAATGCTTAGCTTAAAACAAGCACATTGGTCCAGGTGTGGTGGCTCATGCCTGCAATCCCAGTGCTATGGGAGGATCGCTCGAGACGAGGAGTTCAAAACCAGCCTGGGCAAGACATGGCAAAACCCTATCTCTATAAAAAAATCAAAAAAATTAGCCAAGTGTGGTGGTGTGCTCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGCAGGAGGATCACTCAGCCTCCCCATTAGCTGGGATTACATGCATATGCCACCACACCCAATGAATTTTTTATTTTTAATTTTTTTGTAGAGATGGGGGTCTTGCTATGTTGCTAGGGCTAGTCTGAACTCCTGGCCTCAAGTGATCCTCCTCCCTCAGCCTCCCAAAGTGCTGGAATTACAGGTCTGAGCCACAGCACCCGGCCTGCATGTGCTATACTTTTATACTGCTGGCCATACGGTAGATTTATTGACATGAGCACTGCCACAAGCATGAATAATGCAGTGCTCTACAACGTTATGATGGCTGCAAAGCACTAGGCAATAGGAATGTTTCAGCTCCATTATAATCTTATGGGACCACCATCATATATCAGGTCCGTCATTGACCAAAACACTGTTCTACAGCCCATGACTATGACCAGTCCCTGTTGGACCCCTTTATTCCCCAGCTAGATCCTTCCAAGCAGGCAGTTTGCTCACATTGGCTCTCAGCATGGGCCTGAGCTTATTCTGATGTCCTAAAAACATCACACCACTACTGTATTCATGCCAAAATGAGGCTTACTCATACAGAAAATAGGATGGTTTTATTTTGTGCAAGAAAAATGGAAAAGGCAAGAGTGACTTGCCTGGCAGTTAGTGGTGGTTCCTGCTTTCCATCTTCTGTCTGCGTCTGTCTCTGTGCAAGAGTGGGCCTGCCACATGCGTAGAGAAAGCCAGGGGAGTTCCGAGACTGAGCAGGGGCCAGAGCAGGCTCCCAGAGGAGGTAACCACACTTGGCTAATTTTTTTGATTTTTTTTATAGAGATGGGGTTTTGCCATGTTGCCCAGGCTGGTTTTGAACTCCTGGGCTCAAGCGATCCTCCCATAGCGCTGGGGTTGCCGGCGTGAGCCACCACACATGGACCAATGTGCTTGTTTTAAGCTAAGCGTTATCACAAAAGAGTAAACGTCCTGACTTGAGACTCACCCAGTGAAAGAAAGGAGGCTGTGAGTGCTTGACTGAATGACTTAATAAGACCATGTCTTGTCATCTTGAGATTGAAGGTGCGTGGAGAGCAGATGGCAAAGGACTCAGCATTTGGGACACGTTTTCTCACACACCACTGAGGGTTGAGAACGATGCCATTGGAGACGTGGCCTGTGACAGTTATCACAAGATTGCTGAGGATCTGGTCACCCTGCAGAACCTGGGCGTGTCCCACTACCGTTTTTCCATCTCCTGGTCTCGCATCCTCCCTGATGGAACCACCAGGTACATCAATGAAGCGGGCCTGAACTACTACGTGAGGCTCATCGATACACTGCTGGCCGCCAGCATCCAGCCCCAGGTATGGTGGGTCCTGGCAAGGCCTTGGGAAAGTCCACATGCAGGAACCAGCAGGGCTGGGGGAGCACATTTATCATGTAAACAAAATGCTGGTTTCAAATTTAGCTTTTGTTGTTATTGTTGTTTTTGAGACGTAGTCTCGCTCTGTCACCCAGGCTGGAGTGCAGTGGCGGATCTCGTTTCTCAGCCTCCAGAGTAGCTGGGCTTATAGGCACGCATCACCATGCCCAGCTAATTTTTGTATTTTTAGTAGAGACGGGGTTTCACTGTGTTGGCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATCCACCCGCCTCGGCCTCCCAAAGTGCTGAGATTATAGGTATGAGCCACCGTGCTCGACCTCAAATTTAACTTTTTAATTGTCAGTGAATTCATATGATGCAGAATTTGAAAAGCATAAAAGAGTTTCCTTCCCAACCTTGACCTCAGCCATCTAGTTCTCCCTGAAAGCAATTTACCCTTTTCCCAAAAGAAAAAGAAAAAAATCTTTGAGCCTTTTTGTCTCATAAGTCACTCTCAGGATTTGCAGCAGACGTTGATGCTGGCACATCTAACCTAGGGCTTCCCATGGAAAGCATGACTGATGCAACAGCTGATCTGGCTTCTTCCCAGGTGACCATTTACCACTGGGACCTACCACAGACGCTCCAAGATGTAGGAGGCTGGGAGAATGAGACCATCGTGCAGCGGTTTAAGGAGTATGCAGATGTGCTCTTCCAGAGGCTGGGAGACAAGGTGAAGTTTTGGATCACGCTGAATGAGCCCTTTGTCATTGCTTACCAGGGCTATGGCTACGGAACAGCAGCTCCAGGTAAGTCCCAGCCCTGGCTCATAGGCTCTTTGAATCATGGCATTCTTAGCACATCAAGAGTTGAAATCCAGATGGCACAGGACGAGGCAACCAAGAAAGCCCCATGGCAGAAACCCTGCCTCTTGCTCAGGGCTGTAGAGGTTTCCCTCCTGGCATTGCATTGTTAAACATGAATTTCACATCTCAATTCCTCAAGGTGGGTTCATTTCTTTGCTCTGTGACTCTGATCGGCCTCAGGTGGGAAGCTCTCTGAGATCAGAGATGTGGGCAAATTGATGTGGGGAAGGACAGTTTGTCAGGCTGTACAGGTCTACAGTCTATTTCCACAATTCCCAAACCCCAAAGTTTCTGACAATTGGTTTGGCTGACCTGAGCTCATTTAATAGTAGTGAAACCTGACTCAAACCAACAGGAGGCTGCTCATGGTCTTTATCGCACTGAGCAAACATTCCTAAGTTCTGCTGCAGAGATATCAGTGTGCTTGACTATGGAGAGCTAGCTGTCTGCGGCCCATGGGGCTGATAGGTACTTTTCTCAAAGGAAACATTTAATTCCAAAACACATCTGACCCTAAAAGATTTTTTTTCTTTTAACTTTGGTTGACATGCAATAATTGTACATATGTATGGAGTACAAAATATATTTTGATACATGTATACAATGTGTGATGATGAAATCAGAATAATTAGTATATTCATCACCTCAAACATTCATCATTTCTTTGTGTTGGGAAAATTCGGAGTCCTCTTTTCTAGCTTTTTGCAAATATACACTAAATTCTTTTGTATCATTATTTTTTATTCATTTTTTATTTTTTTGAGACAGAGTCTAGCTCTGTTGGCCACGCTGGAGTGCAGTGGTATGATACCAGCTCACTGCAACCTCCACCTCCTGGGCTCAAGCAATTCTCCTGCCTCAGCCTCCCAAGTAGCTGGGATTACAGGCATGTGTCACCATGCCCAGCTAATTTTTGTATTTTTAGTAGAAATGGGGTTTCATCATGTTGGCCAGGCTGGTCTCGAACTCGTGACCTCAGGTAATCCGCCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGATGTGAGCCACTGCACCCAGCCTAAATTATTGTTAACTACATTCACTCTACAGTGCCACACATCACTAGAACTTCATCTTCTTATCTAGCTTAACTTTGTATCCGTTAACCAACCTCTCCCTGTCCTCCCTCCCTACTACCCTTCCCATCCTCTAATAATCACAATTCTATTCTCTGCTGCTAGGAGCTCATCTTTTTAGCTCCCACATGTGAGTAAGAACATGCAGTATTTGCCTTTCTGTGCCTGACTTATTTCACTTTACATAATGACCTCCAGTTCCAGCGATGTTGTCACGAATGACAAGATTCATCCTTCTTTATGGCTGTATAGTATTCCATCATATATAGATGCCACATTTTCTTTATCCATTCATCTGTTGATGGACACTTAGGTTGATTCCATATCTTGGCCATTGTAACAGTGATGCAATAAACACAGAGGTCAAGGTATTGATTTGGTATTCTGACTTACTTTCCTTTGGATAAATACCCAAAAGTAAGATTGCGGGATCCCTTAGGGTTTTGAATAAGAAATTGTGGACCTATTTTTGTGCCTTGAGAACCATCCAGTTTCCCTTTTATCTAGTTCTGAGTTAATAAGAATAAATTTGTGTTCATTCATGTGATTCCTCAGAACACTGTGTAACAGCTGGAAAGAGCCACCAGGAGATGTGCCTCAAATCGTTATCAGAGGCATCTCCATTGGTCCTAGTTCGCCCCTTTGGTTTACACTCCTTCAACACATGAGAGGCGCCTCTCTTGCAGCATTCACTATCAGGGTATCTCCGTGTCAGCTGGTTGCATTAGCTACATTCTCCAACCCAAATGATCACAATGGGTCCCCAAGGAGCTCCAGATTATCAACTCCTGTCAATTCATTTGAGCCCAGGAGTTCAAGGCTGCCGTGAGCCATGCTCATGGCACCACTGCACTCCAGCCTGGATGACAGTGCATGACCCTGTCTCAAAAATAATCATAGGCCGAGTATGGTATCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCAAGACAGGTGGATCACCTGAGGTCAGGAGTTCAAGACCAGCCTGATCAATATGGTGAAACTCCACCTCTACTAAAAATACAAAAATTAACCAGATGTGGTGGCATGCGCCTGCAGTCCCAGCTACTCAGGAGGCTGAGACAGGAGAATTGTTTGAACCCCTCGGCTCACTGCAGTCTCCACCTCCTGGATTCGAGTGATTCTCCTGCCTCAGCCTCCTGAGTAGCTGGGATTACAGGCGCCTGCCACCACAGCTGGCTAATTTTTTGTATTTTTAGTAGAGACAGGTTTCACCATATTGGGCAAGTTGGTCTCAAACTCCTGACCTCAGGTGATCCACCTGCCTCAGCCTCCCAAAGTGCTGGGATTATGGGTGTGAGCCACCCTGCCTGGCCTGGGTTGTCTTTTAAGTTTTAAAAAATCATTTGTACATATAGTCCTTTCTGAATATAACATTGGTTTGAAGCTAACTCAATAGAAAGAAATTCCATCTCTTTAGTCTCTCTAGTTTGACTATAGTAGCTAACAGAGCTGGGCTGCAATGTCACACTTCACCTTTAGGTGCCACCATCTGATTTGATTTGCTCCAGCCTAAGCATTTCTTAAATTGGGGGCTTTAGATTCTGCGATAGGTATCTCTATTATGGGTATTTTGCCTGAGACGTACGAGCCACTCTTCTGGAGGAGGGGAACAGAAGCCAACATTTATCAAGTGGCCTTCCCTCTTCCAGACACAGTGCTGCTGTGGGCGGGTTTCATACACTGTTTCATTTCCCTCTCACAAGCACTCTCTGCTGCCCATTTCACCAAGGAGGAAACTGAAAGGCTGAATTACTCACCCAAAGTTACACAGCCAGGTAATAAGAGGGAGAGCCAGGGTTTGAATTCCAGTTTTCCTAGTGTCAGAATCTACACCCCTTCTCCCAAACACAGAGGGGACATCTGCAGGCAGGGACTAACAATCTCAGTCACATCCATTCTATCCATTCTCTGTTGACATCCATGTGTATGTTTCCAGGAGTCTCCAATAGGCCTGGCACTGCCCCCTACATTGTTGGCCACAATCTAATAAAGGCTCATGCTGAGGCCTGGCATCTGTACAACGATGTGTACCGCGCCAGTCAAGGTGGCGTGATTTCCATCACCATCAGCAGTGACTGGGCTGAACCCAGAGATCCCTCTAACCAGGAGGATGTGGAGGCAGCCAGGAGATATGTTCAGGTCTGTTTTTCCTCTGGGCGCTTGTTCTTACTCTTGTCCATCTTTCCTTCCAGCCTAATGGAAACAGATTGTGAATCAAAGATTTAGGGTGTGGTGGCTCATGTCTATAATCCCAGCACTTTGGAAGTCTGAGGCAGGAGGATTTCTTGAGGCTAGGAGTTTGAGACTAGCCTGGGCAACATAGAAAGACCTTGTCTGTGAAGAAAAAAAAATTATTAGGGCATGGTGACACACACCTATAGTCCCAGCTGCTCAGGAGGCTAAGGTGGGAGCTTGAGTCCAGGAGGTCAAGGTTGCAGTGAGCCATGATTTCGGCACCATTAAACTCCAGGTGACAGAGGGAGACCACCCCCAACCCCCAAAAAGAAGAATTATTAGGATCTAAGCCTAGTTTTGCTATTGCCTGATTTTGTGAGCTTGGGAAAATCATCTAACTTTTTCGTACCTTAGTTTCCTCAACTTCAAACTGGAGATGTCCTGCCCACCTCAAAAGTTTGCTGTGATGAAAGTGATAAAGTCTGTGGGGTTAAGTTGTCTTTAAGTTTTTAAAAATCATTTGTACAGGTAATTCTTTCTGTAACATTGGTTTGAAGTTAACTCCTAGAGAGAAATTTCCATTTCTTCAGTCTCTCTGGTTTGACTATAGTAGCTAACAGAGCTGGGCTGCAGTGTCACACGTCACCTTTAGGTGTCACCATCTAATTTGATTTGCTCCAGCCTAGGCATTTCTTAAATTGGGGGCTTTAGATTCTGTGAAACTCATTAGTGCCCCTAATTTGCCAACTCAATGAGTTTAAGAACAGCATCTTTCAGTCATGGAATTTCACCCAAGGAAGTAGAGACATTACTAGGAAGCCGACTGGGCAAGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCTAAGGCAGGCTGATAACCTGAGGTCAGGAGTTCTAGACCAGCCTGGCCAACATGGTGAGACCCCATCTCTACTAAAAATATAAAAATTACCGAGGTGTGGTGGCACATGCCTGTAGTTCCAACTACTCTGGAGGCTAAGGCAGGAGAACCCAGAAGCTGAGATCACCATTGCACTACAGCCTGAGCAATGGAGAGAGACTCCATCTCCAAAAAAAAAAAAAAAGGAAGCAGGTGCCAGCATGGCCTACCTTTATTTCCACCTGAACCCCTGACTGACAAGAGGAGAGAAGGAAAAGAATGATCCTCGTTCACGGCACAGCTCCTCTGGCTCCTCCTAGAAAGGGATAGAAGGTGCTCTCTCCTACATTTTGCATCAGGAAGGAAAGGCAAGGGTTTCTCTCTGGATTGGTTCCCACTTTGGGCTCCGTGGAAGAGTCTGCCAACTGTGGCACTATAACTCTCTCGGGGGAAACACCATGGGCCTTGGTTTGCCAAACTCTTCACAAGAGAACTTGACAAAGATAGTCAAATTAATAAATGCAACGAAGGGCTTAATTTTCTGGTCTGCTAGCCAGCTGGGGAAACAGTTTTGAAACAAAATTAATGTATCCCTCATTTCAGACTCACAGAACTGGTGATTTAGAACAACATTTCCTTTTTTCCTGAAATCAAAACTTCAAACGTCAGGAAACGTTAGGCTGTCATATCATTAGATTGAGCCACTTACTGGTGAACTGTGATACTGCAGCCATGGACTATGACAGATGCATCTCTTATTAGAATCAGCTTCCACGAAAATATCTAGTGTGTTACCTTGGAGTGGGTGCATTTAAGCCTCTTTACCAGGGCATGTGTGTTCCATGCCTCATCTCTGACCATGAGTGGAAGCTGCCCTCACCCCACATTCCCAGCAGCAGCCAGTGGGTGAGGAGAGGATTTGGAGAGACTACAGAAGGCTCTCAAACCCCCAGCATCATAGTGTTTGGGTGCCACGTATCCCCGCACATGATAAGGATGGCCTAGGCCTCATTAGAGAAAATAGGTCAAACGGTTCCCCACTCTGGGGCTGTGAAAGCCCATTAAGAGTTATACTTTGCCCGGAAAGTCAGGAAAGCCTTGCATTTCAGCCCTCCCCAGAAAGTTCTGTTTGCTCCTTGGGCACTTTGATGAGGAAGTGCAGCCGCTCAGTGGTGAGAAAGAGAACCGGGAGCCTGGAGGCTTGTGGGTTCTCCCTTCTCCTCTGCAGGACCCATCCAAGGGCAGCCAGGAAGCATTTGTTGAGCACCAGGGCCAGTGCCCAGGTCAGTTACTGTCACCTCAAGCTACAACTATGAAGAGGTGGCCCTGTCCTCAGTGCAACAGCAACTGAGCAAGCACAGACCCAGCTGGGAGACCTGTGGACACTGAAGCTGCAACAGGCAGGAGCCACAGGGAAGAGAGCTTAACCTGGGGGTCCAGGAAAAGGAGGGAGGTTGTGAGGCCAGGCACTTCTGAGCGGGGACGATGCTGTCTCTTGCTGTGCCCACCTGTAACGCCTGGGCTGTCTCCCCCACCCTACCCGCAGTTCATGGGAGGCTGGTTTGCACATCCTATTTTCAAGAATGGAGATTACAATGAGGTGATGAAGACGCGGATCCGTGACAGGAGCTTGGCTGCAGGCCTCAACAAGTCTCGGTAAGGGCCTGGTGCCCGCAGTGGTGAGGGCATGGGCGTCGGGGTCACAGAGGCATGTCTCGGGCAAGTTACCATGACTGAGCTTCTTTGCTTGCTGGGAAGTTACATGTGATCATATATGTAAAGCTTAGCCCAGCACCTGGCACTGCTATATGCTTGACAGAGGGGTTTGCTATTATGGTTATTATATTGTGGAACAGCCAGGAAATCTGTTCCAAGGCTATCCACCTCTGAGCAACCCCACTATGCCACTCCACAGACAAATCCCAGCTCATCCTATCCTATTTCATCACCGCACGTGTGAATGTGGGTGGCAGTGGCTGACAGTCAGGCGTGTGCTTTGCATCACTTGGCCAGCAGGTGGCGATGGTCTGCACCAAAGCGTCCTTCCTTACCCGCCCGCCCCTTCTTGTCAAGGAAGGAGTAGCAACATTAGCAGCCAACGTTTACCCATCTTCTGTTGGTATCTTTATTTCTATTTAAATACGAGTAAAGTGAGGCGCAAAGATTAGGGCAAGATCACCCAGCTATTAAGAGATAGAGCTGAGATTTGCAGCCAAGCTGTCGGCCCCCAGGGCGCATGCTGGGGGCTTGAGAATCATGCAGGTGGGGTGAGAGGTGTGGTGTGAGGAAGCGCAGGAAGCGGCAGTCACTGCCCACGGGAACATGCCCACCGCTCCAAGGCTGGCGGGAATCAAGTGTGAATTCCCAACATAGTCTTTGAGTCCTGCCCCTAGACGCCCCCATATAGTCCAGCTGAGGATTAAAAATCCAGCTCCCAGGCCGGGTGCGGTGGCTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGGCGGATCACTTGAAGTCATGAGTTCAAGACCAGGCTGGCCAACATGGCGAAACCCTGTCTCTACTAAAAATACAAAAATTAGCTGGGTGCGGTGGCGGGGGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCCAGAGAATTGCTTGAGCTCGGGAGGCAGAGGTTGCAGTGAGCCGAGATCATGCCACTGCACTCCAGCCTGGACGATGGAGCAAGACTCTTTGTCTCAAAAAAAAAAAAAAAAAAAAAATCCAGCTCCCACCTCACAATGCACACCCTGGCCTAGTTAGGGTAAGTTCCCCAGAGCAGGCTGTGCCTAAAGGTCTGATCTGCGAACTGTGATCTCCAGCAAAACCGATGAGCGCCCGGCCCATCTGCACAACTTTGCCGAGCACCTACCATTTGCCCGAGCCAGGGCCAGGGGGAGGGGTCCATGGAAGGAGAGGATTCCATTCGGCTCCTCAAGCTGCTGACAGATCAGCTGGAAAGACAGTTCTCCATCCCATTCCCCAGGCTCTTCAGTACAAAGAAAACTCAGAAGATGAGATCAGGGTAGGCTGGGTAGACCCAGGAGATTTCATGGACAAGGCAGGACATCAGCTGAGCTGGGCCTTGACAGGGGATAGAATGTGGGGGATTGCAGGTCATTCTTTCCACGGGGCAGAGTTGTGAAGACAGGCCTAGAGGTAAGAATGAGTGGGGGTGCCAGAGCTGAGAGTGGGGAGACCAGCCCAGCAGCAGTGAAGGGGCTGAGGCCACATGGTGGAGGGCCTGGAAGCCAAGCACAGGGCCTTGGCTTTGGTGCCACAGGCCATTGCTGGCTTCTGAGCAGAAGCGTCAGGGACAGGAGGAGGCTGTGCTTTGGTTTTCTCCCATCATCTCAGAAACCCCAGGGAAGGAAGGTTCTTGATCCCTCAGGTTAATGGTAGCAGAAAGGTCAAGATGAAAGTGTCAGAGGCCTCAGCAGGGCATCTCTGGGCCACACCTAAACCAGAAGCCCCAGGTCTCCCTGTGGGCATGCGCATGCATTCAGGATCACCTGTACATGCTGCACACAAGTGTATTAGTCAGGTTCTACAGGAGAGGCTTTTCACTCAGAGTGCCTGGAAAATGCTCCATCTACAGGTTAAGAAGCCAAAGTTCCCAGGCTCACAAGTAATCTCAGCACTTTGGGAGGCTGAGGCAGGAGGATTGCTTGAGACCAGGAGTTCAAGACCAGCCTGGGCAATATAGTGAGACTTTGTCTTTACAAAAAATTTAAAAATTATCTAGGCATGGTGACGCACACCTATATTCCCAACTACTTGGGAGGCTGAGGTGAGAGGATTGCTTGAGCCCAGGAGACTGAGGTTGCAGTGAGCTATGATTGCACCACTGCACTCCAGCCTCAGGGGGCGTGCAGTGTCCTCAGGAGGACAGGGCGAGACCCTGTCTGGGAAAAAAAAAAGGAAGAAAGAAAACAAAGTTCAGTGAAATCGTGAGAACTGAGAAATTCAAACTCAAATACTCAATTTGCAAATTATTTCCCCTGCTACTTTGAAGATCTTGAAGCTATTATTATTATTATTATTATTATTATTATTATTAGAGTTGGAGAAAAGAGGCATGCCTAAACAAACCCCAAGAGGAGCAGAGAAACTTGGAAAGGGCAGAAAACGGAAATACAAATTACACAAGATACAGGGCCGGGTGTGGTGGCTCATGCCTGTAATCCCAACACTTTAGGAGGCTGAGGTGGGCGGATCACTTGAGGTCAGGAGTTCAAGACCAGCCTGGCCAACATGGTGAAACCCCATCTCTACCAAAAATATAAAAAATCAGCCAGGAGTTGTGGCAGGCGCTTGTAGTCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTGAACCCAGGAGGCGGAGGTTGCAGTGAGCTGAGATCGCGCCACTGCACTCCAGCCTAGGTGACAGAGCCAGACTCAGCCACAAAAAAAAAAAATTATACAAGATCTAACTTTACAAACATAGGGGAAGAGTCTCAGATATGTACCTCAGGCATGCTCTTGATTTTGAAATCTGTCATAGATGCTAAATATCAGAATTTGCATATGTTGTTTGAAGGAAATTAAAAACTGTTTATGCATGTGTGTGTGTGTGTGTGTGTGTGCGCGCGCGCGCGTGGGTGCGTGTGTGTGTGGTCCTTCAGGTTTCAGAGGCTCTTGTAGAGACCTAGGGAAGCCAGGTTTATGGTCTGGCTGGAGGAGCCGCAGTGCTTCCTGCACCGCCTGAAGGACCTCTTCTCCTTCCCTGCCAGTTTCCCCTACTCACTGCCAAGTGCCTGGGGTTCGGAGAGCTCCCCCGCCCCAGCCCTATGACGTTCTCTTCTCAAAGCATGGCTCTCCCTGAAGCCCTGCCCTCAATGGCTTCTTCCACCCAGGCTGCCAGAATTTACAGAGAGTGAGAAGAGGAGGATCAACGGCACCTATGACTTTTTTGGGTTCAATCACTACACCACTGTCCTCGCCTACAACCTCAACTATGCCACTGCCATCTCTTCTTTTGATGCAGACAGGTAAGTCCACCAACAGGGAGCCCGGAGCCATCTCTGGAAAAGGCAGAAAGGTGCCCTTGGCTGTGCTCTCTGAGGCCTGCCCAGCCTCAAAACAGGGCCTCCAACGCCGAATGTGCCGGGGTTTGCAAGATGCTCGCCACCCCACAGATCCCAAGGAAATCTCCAGCAAGCAAGAGGGAGAGAAGGCCGGGGTCTCCAGGGCACCTCCCATTAGGGCACATGTGCTTGGCTTTTTGTTTTTTTATTTCATTTTTAAAATTTATTTATTTTGCGTTCTTTTTCACAGGCTGCTTAACTCTAAAAATGCTTGGTTGTTAAAAAGGATTTTCAAGAAAAAAATTGTTTAGGTTTTATCAAAATTCGCTTTAATATTAATAAATATTACAATAATACAAGAGTAAAATTTTATTTTCTCTTTTTAAACAATGCTTTTATGAAGTAGAGATAGTAAAAGATTTTGGTTCACCTTTTAAGTAACAGTTTTTATTTTATTTATTTTTTTTGAGACAGAGTCTCGCTCTGTCACCAGGCTGGAGTGCAGTGGCACAATCTTGGCTCACTGCAACCTCCACCACCCGGGTTCCAGTGATTCCCCTGCCTCAGCCTCCAAGTAGCTGGGATTACAGGCGCGTACCTCCACGCCTGGCTAATTTTTTTTTTTTTTTTTTTGTATTTTAGCAGAGACGGGGTTTCATCATGTTGGCCAGAATGGTCTCGGCTAATTTTGTGTTTTTAGTAGAGACAGGGTTTCTCCATGTTGGTCAGGCTGGTCTCTATCTCCTGACCTCAGATGATCCCCTTGCCTTGGCCTCCCAAAGTGCTGGGATTACAGGTGTGAGCCACTGCGCCCAGCCGATTGTCTTTTTCTTTATGATATCTATTTATCTAGAGACTTTTTCATCCATATCCTGTATTGTTTTTTAAATTTCTCTAAGTTAATTTTCACCTTTCTCTAGTACCTCCTTGAGTAGTTTAATAATCAACCTTCTGAATTCTTTATCTGGCAATTCAGAGATTTCTTCTTGGTTTGGATCCATAGCTGGAGAGTTAGTGCGACCTTTTGGGGGTCTTATAGAACCTTGTTTTGTCATATTACCAGAATTAACATTTCTGGTTCCTTCTCATTCAGGTAGACTGTTTCAGTGGAAAGATCTAGAACTCAAGGGCTGCCCCTCAGATCCTTTTGTCCCACAGGGTGATCCCTTGATGTGGTGCTCACCCCCTTTCCCTAAGGATGGGGCTTCCTGAGAGCCAGACTGCAGTGATTGCCATTGCCCTTCTGGGTCTATCCACCCAGCAGAATTACTGGGATCTGAACTGGTGCTGAGGAATGTCTGCAAAGAGTCCTGTGATGTGATCTGTCTTCAGGTCTCCCAGCCATGGATACCAGCACCTGCTCCAGTGCAGGTGGCAGGGGACTTAAGTGGATTCTGTGAGGGTCCTTGGTTGTAGTTTTCTTTCGTGCACTGGTTTTCTCAAACGCTGGTTATGCGAGCAGTGAAGTTGTCATGTGGACAGACTCAGGACCTCTGGTTAGCCAGGATGTTGCAGGCAGTGGAATTAGTTGTTGTTTTCTCTTTCTTTGGAGCAGGGTTCTTCTGTTATGAGTTGCTATAATGGCGTGAGTTGGTTGGTCTCCAGCCAGGAGGTGGCACTTTCAAGAGAGGAGGTGGCACTTTTGAGGTGCTTTCCCCTTCCACTGCCACCAGCTGCGGTAGTAGAAGGGGGATTACAAGGTTGCCCTACGTTGGCCAGGGTATTTGGGTTTCTCAGGTTATGGGCGGGGCCATAGAGCTCCCAAGAGTTTATGTCTTTTGTCTTCAGCTACCAGCAAAGGTAGAGAAAGACCATCAGGTTGGGGCAGCATTAGGCATATCTGAGCTCAGACTCTCCTCGGGTGAGGCTTGCTGTGGCCACTGTGGGGGAATAGGGGGCTGGTTCTCAGGCCAGTGGAGTTCTGTTCCCAGGGGTATTATGGCTTCCTCTGCTGTGTCATACAGGTTGCTAGGGAAGTTGGGAAAGCTGGCAGTCACAAGCCTCACCTAGCTCCCATGCAGCCAGCAAGGCCAGTCTCACTCCTGCTGTGCCCTGCCAACAGTGCGCAGGGCAGAGATCTCACCCCAGGCCACAAGCCTTCTCTCTGAGAAAGCAAACAGGGCTTTCAGGCCTTGCCCCTCCCCACCTGCCCACACTGTTGGCTGTGGCTTCTGTGCTTTTATCTGCACTTCTTGTTTGCCCCCAAGATTCTGCTCAGGAAAATTCATGCTTCTAGTTGAAATTAATAAGTTGAGCTAGAAGCTTCCTTAATCCTGCCCCTCCCTAATTCCACTGGCTTCCTTCTACAAGGACCCCTGTAGGATAAAGTCAGGAATGGCTTCCCTGGGCTCAAGCTGGGGACAGGGAGTGGCTACAGGGCTCTTCCCACTGCTTCTTCTACTTTTACATTTCACTTGGCTCCCTAAATCCACTTGCGCTCTAGGTAAGGTTAAATCCTTCTCCTATGATCTGGATGTTCAGGTCCCCAGAGAGGATGTGTGTTCAGAGGCAGACTTTCCCCTTCTCACACTTTGGGAACTCACAGTTTTGCAGCTGTCTCATGGAGTTTGCAGTAGCATTCTGCTTCTTTCAAAGGTCTGTGAATTCTTTTGGTTTTTCTGGTATGTTCCAGCAGTAGTTCTTGGAGCAACAGTTAACAATGTGAGTCTCCACACACTGTTCTGTCCATCCAAGTGGGAGCTGTACCTAAGTCCTGTCTCTTATCTGCCATTTTCAGAAAATCTCATTTTGTTCCTTTTCCATCAAGCATCTTTCCACACCAGTAGCTCTGACTGTGTCCAGGTTCTTTCTTTGAAAACCAGCCCCTCCACTCCCACTGTATCCTGAAACCCCCAGAGTCAGGCTCACCAAGGGGGCTGAGGAGACTCCAGAACACCCCTCTACTCCTTCTGATTCTCCCTCCCTCTTATGAGCCCTGGGGCTGTCCCTGGGAAGACCTCCACTAGTGCTTATGAAATGGCCCACATTCAATTCAGCTGAGTCAGATAAATCCAGGTGCTGATAAAACCAACATTTTTTTTTTCCCTGAAAGATCTGAGTGTAGGAAGTGTCTAAATTCTTGGTTCTTATTTGACATATGTGACTACAGGTGATCAGGCTTCTGTGGCCTCCCCCAGCAGGCACCCCTGTGGCAGGAGCTGTCCACCTCCTCGTCCATCACTGAAGAGAGAGGCCAGGCTCTTGGACACTTGAAGGAGGTGCCAGCCTGTCTGCTGCCTGTCTTCACACAAAGCAGGGCCCCTACAAATATTCGTGCCTGTGGCCTCACCCGTGGTCTGCAAGTGGCATTGACTAGACATGGGTGACTTGGGGGAGCCCATTCTCTGAAGACACAGCACTGGCGTCCATTGGAAGCCTGGAGCTGGGGACCCATAGCCTTTCATAGCTGGTGTAAGGAGCCATCTATTCAGAGTAGTGGAGGGCTGGGCTCCATGAGCAAGACCATTCTCTGCACAGGCAGAGAAATTGAAGGACGGAGAGCTGAGTGGCTTGTCCAATGCTATACAAGCACAGAGTGCAGGCAGCTAGAACCAGCGAAAGGAGGAGAATTTGAGGCAATTTGATTCTTCTGGGTTTAGTATGATTCTTAAGGCTAGACAAAACCTAAGGCTTTTCCTGGTCTAAGTCCTGTTTTGTGTTTCGTGTAAAGGGCCTCTTGAGATTTACAAAACAAGACCTCATCTTTAGTTTTTGAACTTGTAGAGGAGTTGCTTCCATCGCAGATCGCTCGTGGCCAGACTCTGGCTCCTTCTGGCTGAAGATGACGCCTTTTGGCTTCAGGAGGATCCTGAACTGGTTAAAGGAGGAATACAATGACCCTCCAATTTATGTCACAGAGAATGGAGTGTCCCAGCGGGAAGAAACAGACCTCAATGACACTGCAAGGATCTACTACCTTCGGACTTACATCAATGAGGCCCTCAAAGGTACGACGGGCCCGCCCTTCCCCAGCGTGCACCTTCCCCTGTTGGAAACATCTGCTATTTGCAGGAGTGTGATACAGCGTCCTTGGGCCACAGTAAGAAGAGAACGATAAGCCTTCTTGCTCCTGTGGGGGTTAAGCTTTGCTTTAGGGCCGCACTCTGAGTTTTTTTCCTTCCTATGTCTAAGCTCAGCAAGCAAGCCCAGTGGCCTGATGCCACCAAACCCTAGTTCCTGGCCTGAGTCACTTGCTCCCCTTGATCCTCCCTGATGTCGGAACAGCAGTGCCCTCCTGAGAATGGTCTGGGTAGGGCCCAGGAGGAGTAGGCTGTGTGGTTGCCTGTAGGGTCTCTTAGTCTCCGAGCAAAAGTTTGAGGCTCTTGGCTCAGCCCTAACAACTTGAGATGGGACCATGAGGATAAGAGAATGGAACAAATGCTGAGTAAAAGAGCAAGCAATGCACCAGCAAAGGAAATGATTAGAAAAATCCTACCCAGCTGAAATATATTCTATCTAAGGGCTATTTTCTACAGGGAGAAGCATTAACATTCTAAGGCACTGACCAAGATCATGGATAGAAGTGTAGGTGACCCAGGGAAATTAGTCCACCCATGTTCCCGACTCCCATTCAGGGCTGAGCCTTTCTAAAGATGCTGGTGGCTCTACATCTCCATCCACAGAGGACCCGCTAACAGTCACTGCATGCAAAGAAAGGGGCAGCTTTCTCACAGAGCTAAAGCCAAAGCTTACCGCCATCTGTGAAGGCGGTGGGGAGAGCAGTAGGCAGACCTGGCTTCCGAAGGCCTGGCAGGAAGCTGCCTTATGAGATAGGAAACTTATGCCTAGTCCTCTGTGTTTGAGCAGCTGTGCAGGACAAGGTGGACCTTCGAGGATACACAGTTTGGAGTGCGATGGACAATTTTGAGTGGGCCACAGGCTTTTCAGAGAGATTTGGTCTGCATTTTGTGAACTACAGTGACCCTTCTCTGCCAAGGATCCCCAAAGCATCAGCGAAGTTCTACGCCTCTGTGGTCCGATGCAATGGCTTCCCTGACCCCGCTACAGGGCCTCACGCTTGTCTCCACCAGCCAGGTGAGATGTGGCTCTGGGAGGGAATAAAGCCTAAGGGTGAAGGGCAGGTGGAAGGGCCTCTCTCAGTCTGTTTTCTTCTAGTTTATTACCCTCCTCTCTGCTGCCCCATCTCCTTCATTCATTCAACAAATGTTTATTTTCTTTGAATGCTCCAGACATTTCCTGGGGTGGAACAAAGGACACTTAGGGTCAATGTTACACCTAGAAACCAGAGCCCTGACCTGGGAATAAGCAGGTCTAGACTCTTCTTCCAGCCCTACCACTAACTTGCTGTGCACTCATAGATAAGAAACCAGGCACATGCTTCAGGGCAGTTATAAGGATCAAATAAAGGGACCATGAAAGTACAATGCCTGCACCATGTGGGTGCCCCGTATTTGTTAAATGTAAGAGTATAAATCACTACACAAATGTAAATGACAATTATCCAGTTTCAATTATACAGGCTGAGAATCCTTCATCTGAAAATTCAAAATCCAAAATACTCCAAAATCTGAAACTTTTTGAGCATGACTGGCATGACAACACAAATGGAAAATTCAACACCTGACCCCATGTGACAGGTCGCAGTCAAAACACAGTTAAAGCTTTGTTTCATGCACACAATTATTTAACATATTGTATAGAATTACCTTCACACCACGTGGATAAGTTGCATATAAAACATATATGAATTTCATATTTAGACCTGGGTCCCATCCCCAAAATATCTCATTATGTATATGCAAATATTCCAAAATCTTAAAAAATCCCAAATCTGAAACACTTCTGATCCCAAGCAAGCATGTCAGATAAGGGATATTCAACCGTATTCTAATTGCTGATAGAGATGTTTAAAAAATACAACTGCTTAACTTCACTCAGAGGCTCCTGACACACAGTGAAAGGGGATGGGTACCTCCACCTCGGCATCCCGTCAATGGCTGTGCGGATTTGGGGTACCGTGCAGCCTCTGCTGGGGTCTCAGAGATCTGAGAACTCAAATCAGCGCCAGAGCACCAACCACCCTGTGTCACACTCTCCTAGATGCTGGACCCACCATCAGCCCCGTGAGACAGGAGGAGGTGCAGTTCCTGGGGCTAATGCTCGGCACCACAGAAGCACAGACAGCTTTGTACGTTCTCTTTTCTCTTGTGCTTCTTGGAGTCTGTGGCTTGGCATTTCTGTCATACAAGTACTGCAAGCGCTCTAAGCAAGGGAAAACACAACGAAGCCAACAGGAATTGAGCCCGGTGTCTTCATTCTGATGAGTTACCACCTCAAGTTCTATGAAGCAGGCCTAGTTTCTTCATCTATGTTTACCGGCCACCAAACACCTTAGGGTCTTAGACTCTGCTGATACTGGACTTCTCCATAAAGTCCTGCTGCACCGTTAGAGATGACTTTAATCTTGAATGATTTCGACTTGCTGAGTAAAATGGAAATATCTCCATCTTGCTCCAGTATCAGAGTTCATTTGGGCATTTGAGAAGCAAGTAGCTCTTGCGGAAACGTGTAGATACTGGTCTAGTGGGTCTGTGAACCACTTAATTGAACTTAACAGGGCTGTTTTAAGTTTCAGAGTTGTTAAGGGTTGTTAAGGGAGCAAAAACCGTAAAAATCCTTCCTATAAGAAGAAATCAACTCCATTGCATAGACTGCAATATCATCTCCTGCCCTTCTGCAAGCTCTCCCTAGCTTCACATCTTGTGTTTTCCAGAAAATAAAAACAGCAGACTGTCCTTTCTCCTA', u'desc': u'chromosome:GRCh38:2:135787840:135837180:-1'}\n" ] } ], "source": [ "lct_seq = do_request(ensembl_server, 'sequence/id', lct_id)\n", "print(lct_seq)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Vega gene\n", "{u'display_id': u'OTTHUMG00000131738', u'description': None, u'db_display_name': u'Vega gene', u'info_text': u'Added during ensembl-vega production', u'info_type': u'NONE', u'primary_id': u'OTTHUMG00000131738', u'synonyms': [], u'version': u'2', u'dbname': u'Vega_gene'}\n", "Vega gene\n", "{u'display_id': u'LCT', u'description': None, u'db_display_name': u'Vega gene', u'info_text': u'', u'info_type': u'NONE', u'primary_id': u'OTTHUMG00000131738', u'synonyms': [], u'version': u'1', u'dbname': u'Vega_gene'}\n", "Havana gene\n", "{u'display_id': u'OTTHUMG00000131738', u'description': None, u'db_display_name': u'Havana gene', u'info_text': u'', u'info_type': u'NONE', u'primary_id': u'OTTHUMG00000131738', u'synonyms': [], u'version': u'0', u'dbname': u'OTTG'}\n", "Expression Atlas\n", "{u'display_id': u'ENSG00000115850', u'description': u'', u'db_display_name': u'Expression Atlas', u'info_text': u'', u'info_type': u'DIRECT', u'primary_id': u'ENSG00000115850', u'synonyms': [], u'version': u'0', u'dbname': u'ArrayExpress'}\n", "EntrezGene\n", "{u'display_id': u'LCT', u'description': u'lactase', u'db_display_name': u'EntrezGene', u'info_text': u'', u'info_type': u'DEPENDENT', u'primary_id': u'3938', u'synonyms': [u'LAC', u'LPH', u'LPH1'], u'version': u'0', u'dbname': u'EntrezGene'}\n", "HGNC Symbol\n", "{u'display_id': u'LCT', u'description': u'lactase', u'db_display_name': u'HGNC Symbol', u'info_text': u'Generated via ensembl_manual', u'info_type': u'DIRECT', u'primary_id': u'HGNC:6530', u'synonyms': [], u'version': u'0', u'dbname': u'HGNC'}\n", "MIM gene\n", "{u'display_id': u' LACTASE; LCT [*603202]', u'description': u' LACTASE; LCT\\n;;LAC;;\\nLACTASE-PHLORIZIN HYDROLASE; LPH\\n', u'db_display_name': u'MIM gene', u'info_text': u'', u'info_type': u'DEPENDENT', u'primary_id': u'603202', u'synonyms': [], u'version': u'0', u'dbname': u'MIM_GENE'}\n", "MIM disease\n", "{u'display_id': u' LACTASE DEFICIENCY, CONGENITAL [#223000]', u'description': u' LACTASE DEFICIENCY, CONGENITAL\\n;;ALACTASIA, CONGENITAL;;\\nDISACCHARIDE INTOLERANCE II\\n', u'db_display_name': u'MIM disease', u'info_text': u'', u'info_type': u'DEPENDENT', u'primary_id': u'223000', u'synonyms': [u'150220'], u'version': u'0', u'dbname': u'MIM_MORBID'}\n", "UniGene\n", "{u'ensembl_start': 1, u'xref_start': 1, u'display_id': u'Hs.551506', u'score': 31370, u'db_display_name': u'UniGene', u'xref_end': 6274, u'evalue': None, u'info_text': u'', u'info_type': u'SEQUENCE_MATCH', u'ensembl_end': 6274, u'primary_id': u'Hs.551506', u'ensembl_identity': 99, u'synonyms': [], u'version': u'0', u'cigar_line': u'6274M', u'xref_identity': 100, u'dbname': u'UniGene', u'description': u'Lactase'}\n", "UniProtKB Gene Name\n", "{u'display_id': u'LCT', u'description': u'', u'db_display_name': u'UniProtKB Gene Name', u'info_text': u'', u'info_type': u'DEPENDENT', u'primary_id': u'LCT', u'synonyms': [u'LPH'], u'version': u'0', u'dbname': u'Uniprot_gn'}\n", "WikiGene\n", "{u'display_id': u'LCT', u'description': u'lactase', u'db_display_name': u'WikiGene', u'info_text': u'', u'info_type': u'DEPENDENT', u'primary_id': u'3938', u'synonyms': [], u'version': u'0', u'dbname': u'WikiGene'}\n" ] } ], "source": [ "lct_xrefs = do_request(ensembl_server, 'xrefs/id', lct_id)\n", "for xref in lct_xrefs:\n", " print(xref['db_display_name'])\n", " print(xref)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(u'ENSG00000115850', [{u'display_id': u'GO:0000016', u'description': u'lactase activity', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0000016', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0004553', u'description': u'hydrolase activity, hydrolyzing O-glycosyl compounds', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'IEA'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0004553', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0005886', u'description': u'plasma membrane', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0005886', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0005887', u'description': u'integral component of plasma membrane', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0005887', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0005975', u'description': u'carbohydrate metabolic process', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS', u'IEA'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0005975', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0009405', u'description': u'pathogenesis', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0009405', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0016020', u'description': u'membrane', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0016020', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0016324', u'description': u'apical plasma membrane', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'IEA'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0016324', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0017042', u'description': u'glycosylceramidase activity', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'IEA'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0017042', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0044245', u'description': u'polysaccharide digestion', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0044245', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0044281', u'description': u'small molecule metabolic process', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'TAS'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0044281', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0004553', u'description': u'hydrolase activity, hydrolyzing O-glycosyl compounds', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'IEA'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0004553', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}, {u'display_id': u'GO:0005975', u'description': u'carbohydrate metabolic process', u'db_display_name': u'GO', u'info_text': u'Generated via main', u'linkage_types': [u'IEA'], u'info_type': u'DEPENDENT', u'primary_id': u'GO:0005975', u'synonyms': [], u'version': u'0', u'dbname': u'GO'}])\n" ] } ], "source": [ "refs = do_request(ensembl_server, 'xrefs/id', lct_id, external_db='GO', all_levels='1')\n", "print(lct_id, refs)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pan_troglodytes\n", "gorilla_gorilla\n", "pongo_abelii\n", "nomascus_leucogenys\n", "papio_anubis\n", "macaca_mulatta\n", "chlorocebus_sabaeus\n", "callithrix_jacchus\n", "tarsius_syrichta\n", "otolemur_garnettii\n", "microcebus_murinus\n", "ictidomys_tridecemlineatus\n", "rattus_norvegicus\n", "oryctolagus_cuniculus\n", "oryctolagus_cuniculus\n", "ochotona_princeps\n", "dipodomys_ordii\n", "cavia_porcellus\n", "mus_musculus\n", "oryctolagus_cuniculus\n", "myotis_lucifugus\n", "ailuropoda_melanoleuca\n", "felis_catus\n", "canis_familiaris\n", "sus_scrofa\n", "sorex_araneus\n", "ovis_aries\n", "bos_taurus\n", "tursiops_truncatus\n", "sus_scrofa\n", "pteropus_vampyrus\n", "mustela_putorius_furo\n", "erinaceus_europaeus\n", "vicugna_pacos\n", "equus_caballus\n", "{u'taxonomy_level': u'Boreoeutheria', u'target': {u'perc_pos': 92, u'id': u'ENSECAG00000018594', u'protein_id': u'ENSECAP00000016483', u'perc_id': 86, u'cigar_line': u'329MD258MD1264MD76M', u'taxon_id': 9796, u'species': u'equus_caballus'}, u'source': {u'perc_pos': 92, u'id': u'ENSG00000115850', u'protein_id': u'ENSP00000264162', u'perc_id': 86, u'cigar_line': u'351M2D1503MD73M', u'taxon_id': 9606, u'species': u'homo_sapiens'}, u'method_link_type': u'ENSEMBL_ORTHOLOGUES', u'dn_ds': 0.22114, u'type': u'ortholog_one2one'}\n", "Boreoeutheria\n", "dasypus_novemcinctus\n", "choloepus_hoffmanni\n", "echinops_telfairi\n", "loxodonta_africana\n", "procavia_capensis\n", "tupaia_belangeri\n", "sarcophilus_harrisii\n", "macropus_eugenii\n", "monodelphis_domestica\n", "ornithorhynchus_anatinus\n", "ornithorhynchus_anatinus\n", "petromyzon_marinus\n", "meleagris_gallopavo\n", "anolis_carolinensis\n", "ficedula_albicollis\n", "pelodiscus_sinensis\n", "gallus_gallus\n", "taeniopygia_guttata\n", "anas_platyrhynchos\n", "latimeria_chalumnae\n", "astyanax_mexicanus\n", "oryzias_latipes\n", "takifugu_rubripes\n", "gasterosteus_aculeatus\n", "tetraodon_nigroviridis\n", "poecilia_formosa\n", "poecilia_formosa\n", "danio_rerio\n", "gadus_morhua\n", "xiphophorus_maculatus\n", "xiphophorus_maculatus\n", "oreochromis_niloticus\n", "danio_rerio\n", "lepisosteus_oculatus\n", "gasterosteus_aculeatus\n", "takifugu_rubripes\n", "tetraodon_nigroviridis\n", "takifugu_rubripes\n", "xenopus_tropicalis\n", "xenopus_tropicalis\n", "ciona_intestinalis\n", "ciona_savignyi\n", "ciona_savignyi\n", "drosophila_melanogaster\n", "caenorhabditis_elegans\n", "caenorhabditis_elegans\n" ] } ], "source": [ "hom_response = do_request(ensembl_server, 'homology/id', lct_id, type='orthologues', sequence='none')\n", "#print(hom_response['data'][0]['homologies'])\n", "homologies = hom_response['data'][0]['homologies']\n", "for homology in homologies:\n", " print(homology['target']['species'])\n", " if homology['target']['species'] != 'equus_caballus':\n", " continue\n", " print(homology)\n", " print(homology['taxonomy_level'])\n", " horse_id = homology['target']['id']" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{u'assembly_name': u'EquCab2', u'display_name': u'LCT', u'description': u'lactase [Source:HGNC Symbol;Acc:HGNC:6530]', u'seq_region_name': u'18', u'logic_name': u'ensembl', u'object_type': u'Gene', u'start': 19610968, u'id': u'ENSECAG00000018594', u'source': u'ensembl', u'db_type': u'core', u'biotype': u'protein_coding', u'end': 19657160, u'species': u'equus_caballus', u'strand': -1}\n" ] } ], "source": [ "horse_req = do_request(ensembl_server, 'lookup/id', horse_id)\n", "print(horse_req)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#maybe synteny of MCM6 and LCT with caballus and gorilla" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }