{ "metadata": { "name": "", "signature": "sha256:f6bec30484ca3e21977bbf941c317ae3b49c0d0b92ec1d98618613fb6a0f1424" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Global Imports" ] }, { "cell_type": "code", "collapsed": false, "input": [ "%pylab inline" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "prompt_number": 4 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "External Package Imports" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import os as os\n", "import pickle as pickle\n", "import pandas as pd" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "print 'changing to source dirctory'" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "changing to source dirctory\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "os.chdir('../src')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Module Imports" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from Stats.Scipy import *\n", "from Stats.Survival import *\n", "\n", "from Processing.Helpers import *\n", "\n", "from Figures.Helpers import *\n", "from Figures.Pandas import *\n", "from Figures.Boxplots import *\n", "from Figures.Survival import draw_survival_curve, survival_and_stats\n", "from Figures.Survival import draw_survival_curves\n", "from Figures.Survival import survival_stat_plot" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "import Data.Firehose as FH" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Tweaking Display Parameters" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load default custom.css file from ipython profile" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from IPython import utils \n", "from IPython.display import HTML " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "css_file = 'profile_default/static/custom/custom.css'\n", "base = utils.path.get_ipython_dir()\n", "styles = \"\" % (open(os.path.join(base, css_file),'r').read())\n", "display(HTML(styles))" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "" ], "metadata": {}, "output_type": "pyout", "prompt_number": 30, "text": [ "" ] } ], "prompt_number": 30 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pandas display parameters" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.set_option('precision', 3)\n", "pd.set_option('display.width', 300)\n", "plt.rcParams['font.size'] = 12" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Tweaking color scheme" ] }, { "cell_type": "code", "collapsed": false, "input": [ "'''Color schemes for paper taken from http://colorbrewer2.org/'''\n", "colors = plt.rcParams['axes.color_cycle']\n", "colors_st = ['#CA0020', '#F4A582', '#92C5DE', '#0571B0']\n", "colors_th = ['#E66101', '#FDB863', '#B2ABD2', '#5E3C99']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Function to Pull a Firehose Run Container" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def get_run(firehose_dir, version='Latest'):\n", " '''\n", " Helper to get a run from the file-system. \n", " '''\n", " path = '{}/ucsd_analyses'.format(firehose_dir)\n", " if version is 'Latest':\n", " version = sorted(os.listdir(path))[-1]\n", " run = pickle.load(open('{}/{}/RunObject.p'.format(path, version), 'rb'))\n", " return run" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Global Run Variables" ] }, { "cell_type": "code", "collapsed": false, "input": [ "OUT_PATH = '../Data'\n", "RUN_DATE = '2014_01_15'\n", "VERSION = 'all'\n", "CANCER = 'HNSC'\n", "FIGDIR = '../Figures/'\n", "if not os.path.isdir(FIGDIR):\n", " os.makedirs(FIGDIR)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Read In Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here we read in the pre-processed data that we downloaded and initialized in the [download_data notebook](download_data.ipynb)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "print 'populating namespace with data'" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "populating namespace with data\n" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "run_path = '{}/Firehose__{}/'.format(OUT_PATH, RUN_DATE)\n", "run = get_run(run_path, 'Run_' + VERSION)\n", "cancer = run.load_cancer(CANCER)\n", "clinical = cancer.load_clinical()\n", "\n", "mut = cancer.load_data('Mutation')\n", "mut.uncompress()\n", "cn = cancer.load_data('CN_broad')\n", "cn.uncompress()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 17 }, { "cell_type": "markdown", "metadata": {}, "source": [ "The expression objects we use are the updated ones using only HPV- patients. We do this becuase the expression uses some global variables as filters, so we need to reform the feature set after we limit the cohort to HPV- patients." ] }, { "cell_type": "code", "collapsed": false, "input": [ "rna = pickle.load(open(cancer.path + '/mRNASeq/store/no_hpv2.p', 'rb'))\n", "mirna = pickle.load(open(cancer.path + '/miRNASeq/store/no_hpv2.p', 'rb'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 18 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Process Some Clinical Variables into Namespace" ] }, { "cell_type": "code", "collapsed": false, "input": [ "hpv = clinical.hpv\n", "surv = clinical.survival.survival_5y\n", "age = clinical.clinical.age.astype(float)\n", "old = pd.Series(1.*(age>=75), name='old')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 19 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Form Working Set of Patients for Discovery Cohort" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pats = [true_index(hpv == 0), mut.features.columns, cn.features.columns,\n", " surv.unstack().index, rna.features.columns, mirna.features.columns,\n", " true_index(age < 85)]\n", "keepers_o = reduce(set.intersection, map(set, pats))\n", "keepers_o = pd.Index(keepers_o)\n", "len(keepers_o)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 20, "text": [ "250" ] } ], "prompt_number": 20 } ], "metadata": {} } ] }