{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Calculate CCLE Tissue Heatmaps\n", "This notebook will calculate heatmaps for each tissue in the CCLE." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from clustergrammer_widget import *\n", "net = Network(clustergrammer_widget)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load CCLE data\n", "I will load the CCLE data and export it as a Pandas DataFrame that will be used to generate tissue-specific heatmaps." ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(18874, 1037)\n" ] } ], "source": [ "net.load_file('../original_data/CCLE.txt')\n", "ccle = net.export_df()\n", "print(ccle.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Get Unique Tissues" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "cols = ccle.columns.tolist()\n", "tissues = []\n", "for inst_col in cols:\n", " tissues.append(inst_col[1])\n", "tissues = sorted(list(set(tissues)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Intra-Normalized Tissue-Specific Heatmaps" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tissue: autonomic_ganglia: 17\n", "tissue: biliary_tract: 8\n", "tissue: bone: 29\n", "tissue: breast: 59\n", "tissue: central_nervous_system: 69\n", "tissue: endometrium: 27\n", "tissue: haematopoietic_and_lymphoid_tissue: 180\n", "tissue: kidney: 36\n", "tissue: large_intestine: 61\n", "tissue: liver: 28\n", "tissue: lung: 187\n", "tissue: oesophagus: 26\n", "tissue: ovary: 52\n", "tissue: pancreas: 44\n", "tissue: pleura: 11\n", "tissue: prostate: 8\n", "tissue: salivary_gland: 2\n", "tissue: skin: 62\n", "tissue: soft_tissue: 21\n", "tissue: stomach: 38\n", "tissue: thyroid: 12\n", "tissue: upper_aerodigestive_tract: 32\n", "tissue: urinary_tract: 27\n" ] } ], "source": [ "# intra-tissue normalization: filter, enrich, cluster, and save JSON\n", "keep_tissues = []\n", "for inst_tissue in tissues:\n", " net.load_df(ccle)\n", " net.filter_cat('col', 1, inst_tissue)\n", " num_cols = net.dat['mat'].shape[1]\n", " \n", " # only keep tissues that have more than one cell line \n", " if num_cols > 1: \n", " print(inst_tissue + ': ' + str(num_cols))\n", " \n", " # keep list of tissues with multiple cell lines\n", " keep_tissues.append(inst_tissue)\n", " \n", " # filter for top 250 genes in tissue based on variance\n", " net.filter_N_top('row', 250, 'var')\n", " \n", " # normalize gene expression across cell lines in tissue\n", " net.normalize(axis='row', norm_type='zscore')\n", " \n", " # pre-calculate enrichment analysis for Gene Ontology Biological Process\n", " net.enrichrgram('GO_Biological_Process_2015')\n", " \n", " # cluster and tell front-end to enable enrichrgram (do not calculate row-filtered views)\n", " net.cluster(views=[], enrichrgram=True)\n", " \n", " # save to JSON\n", " filename = '../json/intra-norm_' + inst_tissue.split(': ')[1] + '.json'\n", " net.write_json_to_file('viz', filename, indent='no-indent')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Inter-Normalized Tissue-Specific Heatmaps\n", "Here, we are making tissue-specific heatmaps using the most consistently differentially expressed genes across each tissue relative to all cell lines in the CCLE. " ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tissue: autonomic_ganglia: 27\n", "tissue: biliary_tract: 27\n", "tissue: bone: 27\n", "tissue: breast: 27\n", "tissue: central_nervous_system: 27\n", "tissue: endometrium: 27\n", "tissue: haematopoietic_and_lymphoid_tissue: 27\n", "tissue: kidney: 27\n", "tissue: large_intestine: 27\n", "tissue: liver: 27\n", "tissue: lung: 27\n", "tissue: oesophagus: 27\n", "tissue: ovary: 27\n", "tissue: pancreas: 27\n", "tissue: pleura: 27\n", "tissue: prostate: 27\n", "tissue: salivary_gland: 27\n", "tissue: skin: 27\n", "tissue: soft_tissue: 27\n", "tissue: stomach: 27\n", "tissue: thyroid: 27\n", "tissue: upper_aerodigestive_tract: 27\n", "tissue: urinary_tract: 27\n" ] } ], "source": [ "# make inter-tissue normalized ccle DataFrame\n", "net.load_df(ccle)\n", "net.normalize(axis='row', norm_type='zscore')\n", "ccle_zscore = net.export_df()\n", "\n", "for inst_tissue in keep_tissues:\n", " print(inst_tissue + ': ' + str(num_cols))\n", " \n", " # load inter-tissue normalized data\n", " net.load_df(ccle_zscore)\n", " \n", " # filter for tissue of interest\n", " net.filter_cat('col', 1, inst_tissue)\n", " \n", " # keep the top 250 differentially expressed genes \n", " net.filter_N_top('row', 250, 'sum')\n", " \n", " # pre-calculate enrichment analysis for Gene Ontology Biological Process\n", " net.enrichrgram('GO_Biological_Process_2015')\n", " \n", " # cluster and tell front-end to enable enrichrgram \n", " net.cluster(enrichrgram=True)\n", " \n", " # save to JSON\n", " filename = '../json/inter-norm_' + inst_tissue.split(': ')[1] + '.json'\n", " net.write_json_to_file('viz', filename, indent='no-indent')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 2 }