{ "metadata": { "name": "", "signature": "sha256:4c7b33f538348ca89db1734b651d1e25d6a5722d533440483c9a5d6db3b0fd0e" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import matplotlib\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd\n", "\n", "%matplotlib inline\n", "matplotlib.rcParams['savefig.dpi'] = 300\n", "#%config InlineBackend.figure_format = 'svg'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "api_url = \"http://api.brain-map.org/api/v2/data/\"\n", "query_expression = \"query.json?criteria=service::human_microarray_expression[probes$eq{probes}][donor$eq{donor}]\"\n", "query_probes = \"query.xml?criteria=model::Probe,rma::criteria,[probe_type$eq'DNA'],products[abbreviation$eq'HumanMA'],gene[acronym$in{geneid}],rma::options[only$eq'probes.id']\"\n", "\n", "donor = \"'H035.2001'\"\n", "gene_ids=\"'SLC6A2','SCN1A'\"" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "from urllib2 import urlopen\n", "from contextlib import closing\n", "import json\n", "from lxml import etree\n", "\n", "request_url = api_url + query_probes.format(geneid=gene_ids)\n", "\n", "with closing(urlopen(request_url)) as response:\n", " xml_data = response.read()\n", " tree = etree.fromstring(xml_data)\n", "\n", "probes = ','.join([t.text for t in tree.xpath('//probe/id')])\n", "\n", "request_url = api_url + query_expression.format(probes=probes, donor=donor)\n", "with closing(urlopen(request_url)) as response:\n", " probe_data = json.load(response)['msg']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "expr_lvls = {prb['name']: map(float, prb['expression_level'])\n", " for prb in probe_data['probes']}\n", "structures = [s['top_level_structure']['abbreviation'] \n", " for s in probe_data['samples']]\n", "structure_id = [s['structure']['id'] \n", " for s in probe_data['samples']]\n", "genes = {prb['name']: prb['gene-symbol']\n", " for prb in probe_data['probes']}\n", "expr_lvls.update({'top_level_structure' : structures,\n", " 'structure_id' : structure_id})\n", "\n", "df = pd.DataFrame(expr_lvls)\n", "df.to_csv('../data/allen_brain_atlas.csv', index=False)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "df = pd.read_csv('../data/allen_brain_atlas.csv')\n", "df[:10]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", " | A_23_P28224 | \n", "A_23_P358345 | \n", "CUST_16472_PI416261804 | \n", "CUST_17139_PI416261804 | \n", "CUST_546_PI416408490 | \n", "structure_id | \n", "top_level_structure | \n", "
---|---|---|---|---|---|---|---|
0 | \n", "8.8037 | \n", "1.6248 | \n", "2.3772 | \n", "5.9888 | \n", "6.8569 | \n", "4055 | \n", "FL | \n", "
1 | \n", "8.9927 | \n", "1.5055 | \n", "1.7143 | \n", "5.8315 | \n", "6.5077 | \n", "4079 | \n", "FL | \n", "
2 | \n", "8.6063 | \n", "1.5778 | \n", "2.8595 | \n", "6.0884 | \n", "6.5789 | \n", "4079 | \n", "FL | \n", "
3 | \n", "8.5581 | \n", "2.0678 | \n", "1.9892 | \n", "6.3405 | \n", "6.6196 | \n", "4079 | \n", "FL | \n", "
4 | \n", "8.7339 | \n", "1.4767 | \n", "1.4731 | \n", "5.8716 | \n", "6.4472 | \n", "4080 | \n", "FL | \n", "
5 | \n", "8.4503 | \n", "1.8223 | \n", "2.2234 | \n", "5.7434 | \n", "6.4456 | \n", "4080 | \n", "FL | \n", "
6 | \n", "8.7355 | \n", "1.5104 | \n", "1.6819 | \n", "5.9797 | \n", "6.8184 | \n", "4890 | \n", "FL | \n", "
7 | \n", "8.9026 | \n", "1.5253 | \n", "1.7685 | \n", "6.2411 | \n", "6.6980 | \n", "4890 | \n", "FL | \n", "
8 | \n", "8.7765 | \n", "2.9994 | \n", "1.7939 | \n", "6.2621 | \n", "6.8061 | \n", "4048 | \n", "FL | \n", "
9 | \n", "8.9004 | \n", "1.7628 | \n", "1.7252 | \n", "6.3940 | \n", "6.9415 | \n", "4048 | \n", "FL | \n", "