{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compute perturbagen signatures from gold signature consensi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import bz2\n",
    "import json\n",
    "\n",
    "import requests\n",
    "import pandas\n",
    "import sqlite3\n",
    "\n",
    "import l1000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Entrez Gene to Symbol mapping"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Read symbol to entrez_gene_id mapping\n",
    "url = 'https://github.com/dhimmel/entrez-gene/raw/a7362748a34211e5df6f2d185bb3246279760546/data/symbol-map.json'\n",
    "symbol_map = json.loads(requests.get(url).text)\n",
    "\n",
    "# Read dataframe of entrez gene records\n",
    "url = 'https://github.com/dhimmel/entrez-gene/raw/a7362748a34211e5df6f2d185bb3246279760546/data/genes-human.tsv'\n",
    "entrez_gene_df = pandas.read_table(url)\n",
    "renamer = {\n",
    "    'GeneID': 'entrez_gene_id',\n",
    "    'Symbol': 'symbol',\n",
    "    'type_of_gene': 'type_of_gene',\n",
    "    'description': 'description',\n",
    "}\n",
    "entrez_gene_df = entrez_gene_df.rename(columns=renamer)[list(renamer.values())]\n",
    "entrez_gene_df.entrez_gene_id = entrez_gene_df.entrez_gene_id.astype(str)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read probe information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('landmark genes', 978)\n",
      "('best inferred gene set size', 10638)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pr_id</th>\n",
       "      <th>pr_gene_id</th>\n",
       "      <th>pr_gene_symbol</th>\n",
       "      <th>pr_gene_title</th>\n",
       "      <th>is_lm</th>\n",
       "      <th>is_l1000</th>\n",
       "      <th>is_bing</th>\n",
       "      <th>pr_pool_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1831</th>\n",
       "      <td>218075_at</td>\n",
       "      <td>8086</td>\n",
       "      <td>AAAS</td>\n",
       "      <td>achalasia, adrenocortical insufficiency, alacr...</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>inferred</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1833</th>\n",
       "      <td>218434_s_at</td>\n",
       "      <td>65985</td>\n",
       "      <td>AACS</td>\n",
       "      <td>acetoacetyl-CoA synthetase</td>\n",
       "      <td>False</td>\n",
       "      <td>True</td>\n",
       "      <td>True</td>\n",
       "      <td>inferred</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            pr_id pr_gene_id pr_gene_symbol  \\\n",
       "1831    218075_at       8086           AAAS   \n",
       "1833  218434_s_at      65985           AACS   \n",
       "\n",
       "                                          pr_gene_title  is_lm is_l1000  \\\n",
       "1831  achalasia, adrenocortical insufficiency, alacr...  False     True   \n",
       "1833                         acetoacetyl-CoA synthetase  False     True   \n",
       "\n",
       "     is_bing pr_pool_id  \n",
       "1831    True   inferred  \n",
       "1833    True   inferred  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# construct probe_df\n",
    "probe_df = pandas.read_table('data/geneinfo/geneinfo.tsv.gz')\n",
    "probe_to_gene = dict(zip(probe_df.pr_id, probe_df.pr_gene_id))\n",
    "\n",
    "# Landmark probes\n",
    "landmark_probe_df = probe_df[probe_df.pr_pool_id.str.startswith('epsilon').fillna(False)]\n",
    "print('landmark genes', len(landmark_probe_df))\n",
    "\n",
    "# BIGS + Landmark probes\n",
    "probe_df = probe_df[probe_df.is_bing.fillna(False)]\n",
    "# If a gene is an epsilon landmark, do not include imputed probes\n",
    "probe_df.query(\"pr_gene_id not in @landmark_probe_df.pr_gene_id or pr_id in @landmark_probe_df.pr_id\")\n",
    "print('best inferred gene set size', len(probe_df))\n",
    "\n",
    "probe_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>entrez_gene_id</th>\n",
       "      <th>status</th>\n",
       "      <th>symbol</th>\n",
       "      <th>type_of_gene</th>\n",
       "      <th>description</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>100</td>\n",
       "      <td>imputed</td>\n",
       "      <td>ADA</td>\n",
       "      <td>protein-coding</td>\n",
       "      <td>adenosine deaminase</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1000</td>\n",
       "      <td>imputed</td>\n",
       "      <td>CDH2</td>\n",
       "      <td>protein-coding</td>\n",
       "      <td>cadherin 2, type 1, N-cadherin (neuronal)</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  entrez_gene_id   status symbol    type_of_gene  \\\n",
       "0            100  imputed    ADA  protein-coding   \n",
       "1           1000  imputed   CDH2  protein-coding   \n",
       "\n",
       "                                 description  \n",
       "0                        adenosine deaminase  \n",
       "1  cadherin 2, type 1, N-cadherin (neuronal)  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create a dataset of the genes represented by probes\n",
    "gene_df = probe_df.groupby('pr_gene_id').apply(\n",
    "    lambda df: pandas.Series({'status': 'measured' if df.pr_pool_id.iloc[0].startswith('epsilon') else 'imputed'})\n",
    ")\n",
    "gene_df.index.name = 'entrez_gene_id'\n",
    "gene_df = gene_df.reset_index()\n",
    "gene_df = gene_df.merge(entrez_gene_df).sort_values('entrez_gene_id')\n",
    "gene_df.to_csv('data/consensi/genes.tsv', index=False, sep='\\t')\n",
    "gene_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "imputed     6489\n",
       "measured     978\n",
       "Name: status, dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Number of genes after converting from probe to gene\n",
    "gene_df.status.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "inferred          9490\n",
       "epsilon|deltap     786\n",
       "epsilon            192\n",
       "deltap             165\n",
       "Name: pr_pool_id, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Filter BING probes for valid entrez gene IDs\n",
    "probe_df = probe_df.query(\"pr_gene_id in @gene_df.entrez_gene_id\")\n",
    "\n",
    "# Probes in BING\n",
    "probe_df.pr_pool_id.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Preparations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def run_consensi(sig_expr_df, pert_to_sigs, name):\n",
    "    \"\"\"Compute consensi signatures\"\"\"\n",
    "    pert_expr_df = l1000.get_consensus_signatures(sig_expr_df, pert_to_sigs, weighting_subset=landmark_probe_df.pr_id)\n",
    "    pert_expr_df = l1000.probes_to_genes(pert_expr_df, probe_to_gene)\n",
    "    pert_expr_df = pert_expr_df.transpose()\n",
    "    pert_expr_df.index.name = 'perturbagen'\n",
    "    print(pert_expr_df.shape)\n",
    "    path = 'data/consensi/consensi-{}.tsv.bz2'.format(name)\n",
    "    with bz2.BZ2File(path, 'w') as write_file:\n",
    "        pert_expr_df.reset_index().to_csv(write_file, sep='\\t', index=False, float_format='%.3f')\n",
    "    return pert_expr_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# open database connection\n",
    "connection = sqlite3.connect('data/l1000.db')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                                                                                CPU times: user 56min 42s, sys: 9min 9s, total: 1h 5min 52s\n",
      "Wall time: 1h 6min 47s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "# get all gold signatures\n",
    "query = \"SELECT sigs.sig_id FROM sigs WHERE sigs.is_gold = 1\"\n",
    "sigs = pandas.read_sql(query, connection).sig_id.tolist()\n",
    "\n",
    "# get probes and extract signatures\n",
    "probes = probe_df.pr_id.tolist()\n",
    "path = 'download/modzs.gctx'\n",
    "sig_expr_df = l1000.extract_from_gctx(path, probes, sigs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## drugbank consensi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "query = \"\"\"\n",
    "SELECT unichem.resource_id AS drugbank_id, perts.pert_id, sigs.sig_id, sigs.is_gold\n",
    "FROM unichem, perts, sigs\n",
    "WHERE unichem.resource = 'drugbank'\n",
    "AND unichem.pert_uid = perts.pert_uid\n",
    "AND sigs.pert_id = perts.pert_id\n",
    "\"\"\"\n",
    "sig_df = pandas.read_sql(query, connection)\n",
    "sig_df = sig_df.query(\"is_gold == 1\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(1170, 7467)\n",
      "CPU times: user 1h 54min 57s, sys: 1min 16s, total: 1h 56min 14s\n",
      "Wall time: 1h 56min 28s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "pert_to_sigs = {k: g['sig_id'].tolist() for k, g in sig_df.groupby('drugbank_id')}\n",
    "pert_expr_df = run_consensi(sig_expr_df, pert_to_sigs, name='drugbank')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## knockdown consensi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pert_id</th>\n",
       "      <th>pert_iname</th>\n",
       "      <th>pert_type</th>\n",
       "      <th>sig_id</th>\n",
       "      <th>pertubation_entrez_gene_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>TRCN0000139323</td>\n",
       "      <td>PTMS</td>\n",
       "      <td>trt_sh</td>\n",
       "      <td>DER001_HA1E_96H:TRCN0000139323:-666</td>\n",
       "      <td>5763</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>TRCN0000005420</td>\n",
       "      <td>RIOK3</td>\n",
       "      <td>trt_sh</td>\n",
       "      <td>DER001_HA1E_96H:TRCN0000005420:-666</td>\n",
       "      <td>8780</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          pert_id pert_iname pert_type                               sig_id  \\\n",
       "0  TRCN0000139323       PTMS    trt_sh  DER001_HA1E_96H:TRCN0000139323:-666   \n",
       "1  TRCN0000005420      RIOK3    trt_sh  DER001_HA1E_96H:TRCN0000005420:-666   \n",
       "\n",
       "   pertubation_entrez_gene_id  \n",
       "0                        5763  \n",
       "1                        8780  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "SELECT perts.pert_id, perts.pert_iname, perts.pert_type, sigs.sig_id\n",
    "FROM perts, sigs\n",
    "WHERE sigs.pert_id = perts.pert_id\n",
    "AND pert_type = 'trt_sh'\n",
    "AND sigs.is_gold = 1\n",
    "\"\"\"\n",
    "sig_df = pandas.read_sql(query, connection)\n",
    "sig_df['pertubation_entrez_gene_id'] = sig_df.pert_iname.map(symbol_map.get)\n",
    "sig_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4326, 7467)\n",
      "CPU times: user 5h 38min 35s, sys: 1min 5s, total: 5h 39min 40s\n",
      "Wall time: 5h 40min 16s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Condense to perturbagens\n",
    "pert_to_sigs = {int(k): g['sig_id'].tolist() for k, g in sig_df.groupby('pertubation_entrez_gene_id')}\n",
    "pert_expr_df = run_consensi(sig_expr_df, pert_to_sigs, name='knockdown')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## overexpression consensi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pert_id</th>\n",
       "      <th>pert_iname</th>\n",
       "      <th>pert_type</th>\n",
       "      <th>sig_id</th>\n",
       "      <th>pertubation_entrez_gene_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>CMAP-HSF-DSTYK</td>\n",
       "      <td>DSTYK</td>\n",
       "      <td>trt_oe</td>\n",
       "      <td>HSF001_HEK293T_48H:CMAP-HSF-DSTYK:200</td>\n",
       "      <td>25778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>CMAP-HSF-DYRK1B</td>\n",
       "      <td>DYRK1B</td>\n",
       "      <td>trt_oe</td>\n",
       "      <td>HSF001_HEK293T_48H:CMAP-HSF-DYRK1B:200</td>\n",
       "      <td>9149</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           pert_id pert_iname pert_type  \\\n",
       "0   CMAP-HSF-DSTYK      DSTYK    trt_oe   \n",
       "1  CMAP-HSF-DYRK1B     DYRK1B    trt_oe   \n",
       "\n",
       "                                   sig_id  pertubation_entrez_gene_id  \n",
       "0   HSF001_HEK293T_48H:CMAP-HSF-DSTYK:200                       25778  \n",
       "1  HSF001_HEK293T_48H:CMAP-HSF-DYRK1B:200                        9149  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "SELECT perts.pert_id, perts.pert_iname, perts.pert_type, sigs.sig_id\n",
    "FROM perts, sigs\n",
    "WHERE sigs.pert_id = perts.pert_id\n",
    "AND pert_type = 'trt_oe'\n",
    "AND sigs.is_gold = 1\n",
    "\"\"\"\n",
    "sig_df = pandas.read_sql(query, connection)\n",
    "sig_df['pertubation_entrez_gene_id'] = sig_df.pert_iname.map(symbol_map.get)\n",
    "sig_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2413, 7467)\n",
      "CPU times: user 2h 25min 22s, sys: 6.9 s, total: 2h 25min 29s\n",
      "Wall time: 2h 25min 42s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Condense to perturbagens\n",
    "pert_to_sigs = {int(k): g['sig_id'].tolist() for k, g in sig_df.groupby('pertubation_entrez_gene_id')}\n",
    "pert_expr_df = run_consensi(sig_expr_df, pert_to_sigs, name='overexpression')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## consensi for all pert_ids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pert_id</th>\n",
       "      <th>pert_iname</th>\n",
       "      <th>pert_type</th>\n",
       "      <th>sig_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>BRD-K07762753</td>\n",
       "      <td>aminopurvalanol-a</td>\n",
       "      <td>trt_cp</td>\n",
       "      <td>CVD001_HUH7_24H:BRD-K07762753-001-03-6:50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>BRD-A46393198</td>\n",
       "      <td>tetramisole</td>\n",
       "      <td>trt_cp</td>\n",
       "      <td>CPC004_VCAP_6H:BRD-A46393198-003-10-9:10</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         pert_id         pert_iname pert_type  \\\n",
       "0  BRD-K07762753  aminopurvalanol-a    trt_cp   \n",
       "1  BRD-A46393198        tetramisole    trt_cp   \n",
       "\n",
       "                                      sig_id  \n",
       "0  CVD001_HUH7_24H:BRD-K07762753-001-03-6:50  \n",
       "1   CPC004_VCAP_6H:BRD-A46393198-003-10-9:10  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = \"\"\"\n",
    "SELECT perts.pert_id, perts.pert_iname, perts.pert_type, sigs.sig_id\n",
    "FROM perts, sigs\n",
    "WHERE sigs.pert_id = perts.pert_id\n",
    "AND sigs.is_gold = 1\n",
    "\"\"\"\n",
    "sig_df = pandas.read_sql(query, connection)\n",
    "sig_df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(38327, 7467)\n",
      "CPU times: user 1d 18h 22min 52s, sys: 6min 7s, total: 1d 18h 29min\n",
      "Wall time: 1d 18h 33min 36s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# Condense to perturbagens\n",
    "pert_to_sigs = {k: g['sig_id'].tolist() for k, g in sig_df.groupby('pert_id')}\n",
    "pert_expr_df = run_consensi(sig_expr_df, pert_to_sigs, name='pert_id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>100</th>\n",
       "      <th>1000</th>\n",
       "      <th>10000</th>\n",
       "      <th>10001</th>\n",
       "      <th>10005</th>\n",
       "      <th>10006</th>\n",
       "      <th>10007</th>\n",
       "      <th>1001</th>\n",
       "      <th>10010</th>\n",
       "      <th>100129250</th>\n",
       "      <th>...</th>\n",
       "      <th>998</th>\n",
       "      <th>9984</th>\n",
       "      <th>9986</th>\n",
       "      <th>9987</th>\n",
       "      <th>9988</th>\n",
       "      <th>9989</th>\n",
       "      <th>999</th>\n",
       "      <th>9991</th>\n",
       "      <th>9994</th>\n",
       "      <th>9997</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>perturbagen</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>56582</th>\n",
       "      <td>-0.433639</td>\n",
       "      <td>0.797953</td>\n",
       "      <td>-0.089733</td>\n",
       "      <td>-0.627366</td>\n",
       "      <td>0.132508</td>\n",
       "      <td>-0.114487</td>\n",
       "      <td>-1.254363</td>\n",
       "      <td>0.089211</td>\n",
       "      <td>-0.327051</td>\n",
       "      <td>-0.492944</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.128962</td>\n",
       "      <td>-0.018879</td>\n",
       "      <td>-0.026018</td>\n",
       "      <td>-0.300755</td>\n",
       "      <td>0.447996</td>\n",
       "      <td>0.39047</td>\n",
       "      <td>2.441479</td>\n",
       "      <td>-0.325544</td>\n",
       "      <td>0.481472</td>\n",
       "      <td>-0.690715</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5981</th>\n",
       "      <td>0.953652</td>\n",
       "      <td>-0.170241</td>\n",
       "      <td>-1.344758</td>\n",
       "      <td>0.200088</td>\n",
       "      <td>3.032086</td>\n",
       "      <td>1.175347</td>\n",
       "      <td>-0.768220</td>\n",
       "      <td>1.000292</td>\n",
       "      <td>1.883492</td>\n",
       "      <td>0.335095</td>\n",
       "      <td>...</td>\n",
       "      <td>0.045078</td>\n",
       "      <td>-0.595802</td>\n",
       "      <td>-0.201252</td>\n",
       "      <td>-0.149790</td>\n",
       "      <td>-1.290155</td>\n",
       "      <td>1.35908</td>\n",
       "      <td>0.803665</td>\n",
       "      <td>-1.481666</td>\n",
       "      <td>0.970973</td>\n",
       "      <td>0.088374</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2 rows × 7467 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  100      1000     10000     10001     10005     10006  \\\n",
       "perturbagen                                                               \n",
       "56582       -0.433639  0.797953 -0.089733 -0.627366  0.132508 -0.114487   \n",
       "5981         0.953652 -0.170241 -1.344758  0.200088  3.032086  1.175347   \n",
       "\n",
       "                10007      1001     10010  100129250    ...          998  \\\n",
       "perturbagen                                             ...                \n",
       "56582       -1.254363  0.089211 -0.327051  -0.492944    ...    -0.128962   \n",
       "5981        -0.768220  1.000292  1.883492   0.335095    ...     0.045078   \n",
       "\n",
       "                 9984      9986      9987      9988     9989       999  \\\n",
       "perturbagen                                                              \n",
       "56582       -0.018879 -0.026018 -0.300755  0.447996  0.39047  2.441479   \n",
       "5981        -0.595802 -0.201252 -0.149790 -1.290155  1.35908  0.803665   \n",
       "\n",
       "                 9991      9994      9997  \n",
       "perturbagen                                \n",
       "56582       -0.325544  0.481472 -0.690715  \n",
       "5981        -1.481666  0.970973  0.088374  \n",
       "\n",
       "[2 rows x 7467 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pert_expr_df.head(2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}