{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Calculate CCLE Tissue Heatmaps\n",
    "This notebook will calculate heatmaps for each tissue in the CCLE."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from clustergrammer_widget import *\n",
    "net = Network(clustergrammer_widget)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load CCLE data\n",
    "I will load the CCLE data and export it as a Pandas DataFrame that will be used to generate tissue-specific heatmaps."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(18874, 1037)\n"
     ]
    }
   ],
   "source": [
    "net.load_file('../original_data/CCLE.txt')\n",
    "ccle = net.export_df()\n",
    "print(ccle.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Get Unique Tissues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "cols = ccle.columns.tolist()\n",
    "tissues = []\n",
    "for inst_col in cols:\n",
    "    tissues.append(inst_col[1])\n",
    "tissues = sorted(list(set(tissues)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Intra-Normalized Tissue-Specific Heatmaps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tissue: autonomic_ganglia: 17\n",
      "tissue: biliary_tract: 8\n",
      "tissue: bone: 29\n",
      "tissue: breast: 59\n",
      "tissue: central_nervous_system: 69\n",
      "tissue: endometrium: 27\n",
      "tissue: haematopoietic_and_lymphoid_tissue: 180\n",
      "tissue: kidney: 36\n",
      "tissue: large_intestine: 61\n",
      "tissue: liver: 28\n",
      "tissue: lung: 187\n",
      "tissue: oesophagus: 26\n",
      "tissue: ovary: 52\n",
      "tissue: pancreas: 44\n",
      "tissue: pleura: 11\n",
      "tissue: prostate: 8\n",
      "tissue: salivary_gland: 2\n",
      "tissue: skin: 62\n",
      "tissue: soft_tissue: 21\n",
      "tissue: stomach: 38\n",
      "tissue: thyroid: 12\n",
      "tissue: upper_aerodigestive_tract: 32\n",
      "tissue: urinary_tract: 27\n"
     ]
    }
   ],
   "source": [
    "# intra-tissue normalization: filter, enrich, cluster, and save JSON\n",
    "keep_tissues = []\n",
    "for inst_tissue in tissues:\n",
    "    net.load_df(ccle)\n",
    "    net.filter_cat('col', 1, inst_tissue)\n",
    "    num_cols = net.dat['mat'].shape[1]\n",
    "    \n",
    "    # only keep tissues that have more than one cell line \n",
    "    if num_cols > 1: \n",
    "        print(inst_tissue + ': ' + str(num_cols))\n",
    "        \n",
    "        # keep list of tissues with multiple cell lines\n",
    "        keep_tissues.append(inst_tissue)\n",
    "    \n",
    "        # filter for top 250 genes in tissue based on variance\n",
    "        net.filter_N_top('row', 250, 'var')\n",
    "        \n",
    "        # normalize gene expression across cell lines in tissue\n",
    "        net.normalize(axis='row', norm_type='zscore')\n",
    "        \n",
    "        # pre-calculate enrichment analysis for Gene Ontology Biological Process\n",
    "        net.enrichrgram('GO_Biological_Process_2015')\n",
    "        \n",
    "        # cluster and tell front-end to enable enrichrgram (do not calculate row-filtered views)\n",
    "        net.cluster(views=[], enrichrgram=True)\n",
    "        \n",
    "        # save to JSON\n",
    "        filename = '../json/intra-norm_' + inst_tissue.split(': ')[1] + '.json'\n",
    "        net.write_json_to_file('viz', filename, indent='no-indent')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Inter-Normalized Tissue-Specific Heatmaps\n",
    "Here, we are making tissue-specific heatmaps using the most consistently differentially expressed genes across each tissue relative to all cell lines in the CCLE. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tissue: autonomic_ganglia: 27\n",
      "tissue: biliary_tract: 27\n",
      "tissue: bone: 27\n",
      "tissue: breast: 27\n",
      "tissue: central_nervous_system: 27\n",
      "tissue: endometrium: 27\n",
      "tissue: haematopoietic_and_lymphoid_tissue: 27\n",
      "tissue: kidney: 27\n",
      "tissue: large_intestine: 27\n",
      "tissue: liver: 27\n",
      "tissue: lung: 27\n",
      "tissue: oesophagus: 27\n",
      "tissue: ovary: 27\n",
      "tissue: pancreas: 27\n",
      "tissue: pleura: 27\n",
      "tissue: prostate: 27\n",
      "tissue: salivary_gland: 27\n",
      "tissue: skin: 27\n",
      "tissue: soft_tissue: 27\n",
      "tissue: stomach: 27\n",
      "tissue: thyroid: 27\n",
      "tissue: upper_aerodigestive_tract: 27\n",
      "tissue: urinary_tract: 27\n"
     ]
    }
   ],
   "source": [
    "# make inter-tissue normalized ccle DataFrame\n",
    "net.load_df(ccle)\n",
    "net.normalize(axis='row', norm_type='zscore')\n",
    "ccle_zscore = net.export_df()\n",
    "\n",
    "for inst_tissue in keep_tissues:\n",
    "    print(inst_tissue + ': ' + str(num_cols))\n",
    "    \n",
    "    # load inter-tissue normalized data\n",
    "    net.load_df(ccle_zscore)\n",
    "    \n",
    "    # filter for tissue of interest\n",
    "    net.filter_cat('col', 1, inst_tissue)\n",
    "    \n",
    "    # keep the top 250 differentially expressed genes \n",
    "    net.filter_N_top('row', 250, 'sum')\n",
    "    \n",
    "    # pre-calculate enrichment analysis for Gene Ontology Biological Process\n",
    "    net.enrichrgram('GO_Biological_Process_2015')\n",
    "    \n",
    "    # cluster and tell front-end to enable enrichrgram \n",
    "    net.cluster(enrichrgram=True)\n",
    "    \n",
    "    # save to JSON\n",
    "    filename = '../json/inter-norm_' + inst_tissue.split(': ')[1] + '.json'\n",
    "    net.write_json_to_file('viz', filename, indent='no-indent')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}