{ "metadata": { "name": "", "signature": "sha256:da677df9cac894af51ace4b828a5ff3589f610081892779058f2f5328a34262e" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Global Imports" ] }, { "cell_type": "code", "collapsed": false, "input": [ "%pylab inline" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "prompt_number": 1 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "External Package Imports" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import os as os\n", "import pickle as pickle\n", "import pandas as pd" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "print 'changing to source dirctory'" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "changing to source dirctory\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "os.chdir('../src')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Module Imports" ] }, { "cell_type": "code", "collapsed": false, "input": [ "from Stats.Scipy import *\n", "from Stats.Survival import *\n", "\n", "from Processing.Helpers import *\n", "\n", "from Figures.Helpers import *\n", "from Figures.Pandas import *\n", "from Figures.Boxplots import *\n", "from Figures.Survival import draw_survival_curve, survival_and_stats\n", "from Figures.Survival import draw_survival_curves\n", "from Figures.Survival import survival_stat_plot" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "import Data.Firehose as FH" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Tweaking Display Parameters" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pd.set_option('precision', 3)\n", "pd.set_option('display.line_width', 100)\n", "pd.set_option('display.width', 300)\n", "plt.rcParams['font.size'] = 12" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "'''Color schemes for paper taken from http://colorbrewer2.org/'''\n", "colors = plt.rcParams['axes.color_cycle']\n", "colors_st = ['#CA0020', '#F4A582', '#92C5DE', '#0571B0']\n", "colors_th = ['#E66101', '#FDB863', '#B2ABD2', '#5E3C99']" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Function to Pull a Firehose Run Container" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def get_run(firehose_dir, version='Latest'):\n", " '''\n", " Helper to get a run from the file-system. \n", " '''\n", " path = '{}/ucsd_analyses'.format(firehose_dir)\n", " if version is 'Latest':\n", " version = sorted(os.listdir(path))[-1]\n", " run = pickle.load(open('{}/{}/RunObject.p'.format(path, version), 'rb'))\n", " return run" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Read In Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here we read in the pre-processed data that we downloaded and initialized in the [download_data notebook](download_data.ipynb)." ] }, { "cell_type": "code", "collapsed": false, "input": [ "print 'populating namespace with data'" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "populating namespace with data\n" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "params = pd.read_table('../global_params.txt', header=None, squeeze=True, \n", " index_col=0)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "run_path = '{}/Firehose__{}/'.format(params.ix['OUT_PATH'], params.ix['RUN_DATE'])\n", "run = get_run(run_path, 'Run_' + params.ix['VERSION'])\n", "cancer = run.load_cancer(params.ix['CANCER'])\n", "clinical = cancer.load_clinical()\n", "\n", "mut = cancer.load_data('Mutation')\n", "mut.uncompress()\n", "cn = cancer.load_data('CN_broad')\n", "cn.uncompress()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "markdown", "metadata": {}, "source": [ "The expression objects we use are the updated ones using only HPV- patients. We do this becuase the expression uses some global variables as filters, so we need to reform the feature set after we limit the cohort to HPV- patients." ] }, { "cell_type": "code", "collapsed": false, "input": [ "rna = pickle.load(open(cancer.path + '/mRNASeq/store/no_hpv2.p', 'rb'))\n", "mirna = pickle.load(open(cancer.path + '/miRNASeq/store/no_hpv2.p', 'rb'))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Process Some Clinical Variables into Namespace" ] }, { "cell_type": "code", "collapsed": false, "input": [ "hpv = clinical.hpv\n", "surv = clinical.survival.survival_5y\n", "age = clinical.clinical.age.astype(float)\n", "old = pd.Series(1.*(age>=75), name='old')" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Form Working Set of Patients for Discovery Cohort" ] }, { "cell_type": "code", "collapsed": false, "input": [ "pats = [true_index(hpv == 0), mut.features.columns, cn.features.columns,\n", " surv.unstack().index, rna.features.columns, mirna.features.columns,\n", " true_index(age < 85)]\n", "keepers_o = reduce(set.intersection, map(set, pats))\n", "keepers_o = pd.Index(keepers_o)\n", "len(keepers_o)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 16, "text": [ "250" ] } ], "prompt_number": 16 } ], "metadata": {} } ] }