{ "metadata": { "kernelspec": { "codemirror_mode": { "name": "ipython", "version": 2 }, "display_name": "IPython (Python 2)", "language": "python", "name": "python2" }, "name": "", "signature": "sha256:d3c4615a1f2e8e3b369e0e5417aba12a1e950ac97882cbd7e9ea1de28ab722f3" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "

Download Data Get updated clinical data from the TCGA Data Portal

" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
\n", "

GLOBAL VARIABLE WARNING

\n", "Here I download updated clinical data from the TCGA Data Portal. \n", "This is a secure site which uses HTTPS. I had to give it a path \n", "to my ca-cert for the download to work. \n", "\n", "Download a copy of a generic cacert.pem [here](http://curl.haxx.se/ca/cacert.pem).\n", "
" ] }, { "cell_type": "code", "collapsed": false, "input": [ "PATH_TO_CACERT = '/cellar/users/agross/cacert.pem'" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "cd ../src" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "/cellar/users/agross/TCGA_Code/TCGA/src\n" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "from Processing.Imports import *\n", "from IPython.display import clear_output" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "line_width has been deprecated, use display.width instead (currently both are\n", "identical)\n", "\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "OUT_PATH = '../Data'\n", "RUN_DATE = '2014_01_15'\n", "VERSION = 'all'\n", "CANCER = 'HNSC'\n", "FIGDIR = '../Figures/'\n", "if not os.path.isdir(FIGDIR):\n", " os.makedirs(FIGDIR)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "run_path = '{}/Firehose__{}/'.format(OUT_PATH, RUN_DATE)\n", "run = get_run(run_path, 'Run_' + VERSION)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "path = 'https://tcga-data.nci.nih.gov/tcgafiles/ftp_auth/distro_ftpusers/anonymous/tumor/'\n", "out = OUT_PATH + '/Followup_R7'\n", "\n", "if not os.path.isdir(out):\n", " os.makedirs(out)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "run.cancers" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 7, "text": [ "array(['ACC', 'BLCA', 'BRCA', 'CESC', 'COAD', 'DLBC', 'ESCA', 'GBM',\n", " 'HNSC', 'KICH', 'KIRC', 'KIRP', 'LAML', 'LGG', 'LIHC', 'LUAD',\n", " 'LUSC', 'OV', 'PAAD', 'PRAD', 'READ', 'SARC', 'SKCM', 'STAD',\n", " 'THCA', 'UCEC'], dtype=object)" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "for cancer in run.cancers:\n", " print cancer\n", " try:\n", " f = '{}{}/bcr/biotab/clin/'.format(path, cancer.lower())\n", " files = pd.read_table(f + 'MANIFEST.txt', sep=' ', header=None)\n", " if not os.path.isdir(out + '/' + cancer):\n", " os.makedirs(out + '/' + cancer)\n", " for g in files[1]:\n", " p = f + g\n", " o = out + '/' + cancer + '/' + g\n", " !curl --cacert $PATH_TO_CACERT $p > $o\n", " except:\n", " print 'FAIL: Make sure path to cacert.pem is set!'\n", " clear_output()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "for f in os.listdir(out):\n", " path = out + '/' + f + '/'\n", " for f in os.listdir(path):\n", " if 'nationwidechildrens' in f:\n", " os.rename(path + f, path + '_'.join(f.split('_')[1:]))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 } ], "metadata": {} } ] }