{
"metadata": {
"name": "",
"signature": "sha256:f6bec30484ca3e21977bbf941c317ae3b49c0d0b92ec1d98618613fb6a0f1424"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Global Imports"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%pylab inline"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"prompt_number": 4
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"External Package Imports"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import os as os\n",
"import pickle as pickle\n",
"import pandas as pd"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 5
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print 'changing to source dirctory'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"changing to source dirctory\n"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"os.chdir('../src')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 7
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Module Imports"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from Stats.Scipy import *\n",
"from Stats.Survival import *\n",
"\n",
"from Processing.Helpers import *\n",
"\n",
"from Figures.Helpers import *\n",
"from Figures.Pandas import *\n",
"from Figures.Boxplots import *\n",
"from Figures.Survival import draw_survival_curve, survival_and_stats\n",
"from Figures.Survival import draw_survival_curves\n",
"from Figures.Survival import survival_stat_plot"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import Data.Firehose as FH"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Tweaking Display Parameters"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load default custom.css file from ipython profile"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from IPython import utils \n",
"from IPython.display import HTML "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"css_file = 'profile_default/static/custom/custom.css'\n",
"base = utils.path.get_ipython_dir()\n",
"styles = \"\" % (open(os.path.join(base, css_file),'r').read())\n",
"display(HTML(styles))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
""
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 30,
"text": [
""
]
}
],
"prompt_number": 30
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Pandas display parameters"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pd.set_option('precision', 3)\n",
"pd.set_option('display.width', 300)\n",
"plt.rcParams['font.size'] = 12"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 12
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Tweaking color scheme"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"'''Color schemes for paper taken from http://colorbrewer2.org/'''\n",
"colors = plt.rcParams['axes.color_cycle']\n",
"colors_st = ['#CA0020', '#F4A582', '#92C5DE', '#0571B0']\n",
"colors_th = ['#E66101', '#FDB863', '#B2ABD2', '#5E3C99']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Function to Pull a Firehose Run Container"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def get_run(firehose_dir, version='Latest'):\n",
" '''\n",
" Helper to get a run from the file-system. \n",
" '''\n",
" path = '{}/ucsd_analyses'.format(firehose_dir)\n",
" if version is 'Latest':\n",
" version = sorted(os.listdir(path))[-1]\n",
" run = pickle.load(open('{}/{}/RunObject.p'.format(path, version), 'rb'))\n",
" return run"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 14
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Global Run Variables"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"OUT_PATH = '../Data'\n",
"RUN_DATE = '2014_01_15'\n",
"VERSION = 'all'\n",
"CANCER = 'HNSC'\n",
"FIGDIR = '../Figures/'\n",
"if not os.path.isdir(FIGDIR):\n",
" os.makedirs(FIGDIR)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Read In Data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here we read in the pre-processed data that we downloaded and initialized in the [download_data notebook](download_data.ipynb)."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print 'populating namespace with data'"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"populating namespace with data\n"
]
}
],
"prompt_number": 16
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"run_path = '{}/Firehose__{}/'.format(OUT_PATH, RUN_DATE)\n",
"run = get_run(run_path, 'Run_' + VERSION)\n",
"cancer = run.load_cancer(CANCER)\n",
"clinical = cancer.load_clinical()\n",
"\n",
"mut = cancer.load_data('Mutation')\n",
"mut.uncompress()\n",
"cn = cancer.load_data('CN_broad')\n",
"cn.uncompress()"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 17
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The expression objects we use are the updated ones using only HPV- patients. We do this becuase the expression uses some global variables as filters, so we need to reform the feature set after we limit the cohort to HPV- patients."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"rna = pickle.load(open(cancer.path + '/mRNASeq/store/no_hpv2.p', 'rb'))\n",
"mirna = pickle.load(open(cancer.path + '/miRNASeq/store/no_hpv2.p', 'rb'))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 18
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Process Some Clinical Variables into Namespace"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"hpv = clinical.hpv\n",
"surv = clinical.survival.survival_5y\n",
"age = clinical.clinical.age.astype(float)\n",
"old = pd.Series(1.*(age>=75), name='old')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 19
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Form Working Set of Patients for Discovery Cohort"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pats = [true_index(hpv == 0), mut.features.columns, cn.features.columns,\n",
" surv.unstack().index, rna.features.columns, mirna.features.columns,\n",
" true_index(age < 85)]\n",
"keepers_o = reduce(set.intersection, map(set, pats))\n",
"keepers_o = pd.Index(keepers_o)\n",
"len(keepers_o)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 20,
"text": [
"250"
]
}
],
"prompt_number": 20
}
],
"metadata": {}
}
]
}