{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#import pandas as pd" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "#pwd" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 551M\r\n", "drwxr-xr-x 3 btsui users 49 Jul 11 19:15 .\r\n", "-rw-r--r-- 1 btsui users 22K Jul 11 19:15 MaskingGenomeWithSnp_human.ipynb\r\n", "drwxr-xr-x 2 btsui users 12 Jul 11 19:15 .ipynb_checkpoints\r\n", "-rw-r--r-- 1 btsui users 909K Jul 11 15:34 gdc_manifest.2018-07-11.txt\r\n", "-rw-r--r-- 1 btsui users 12K Jul 11 15:32 downloadTCGA_LGG.ipynb\r\n", "drwxr-xr-x 19 btsui users 24 Jul 8 09:29 ..\r\n", "-rw-r--r-- 1 btsui users 20K Jul 4 16:19 old_BuildReferenceWithMicrobes.ipynb\r\n", "-rw-r--r-- 1 btsui users 7.5K Jul 4 16:13 MergeViralAndBacterial.ipynb\r\n", "-rw-r--r-- 1 btsui users 50K Jul 4 14:41 MaskingGenomeWithSnp_Specie.ipynb\r\n", "-rw-r--r-- 1 btsui users 64K Jul 4 14:25 ParseBamReadCount_base_case.ipynb\r\n", "-rw-r--r-- 1 btsui users 29 Mar 2 16:19 tmp.out.100.bed\r\n", "-rw-r--r-- 1 btsui users 0 Mar 2 16:19 unMapped\r\n", "-rw-r--r-- 1 btsui users 31 Mar 2 16:19 tmp.bed\r\n", "-rw-r--r-- 1 btsui users 22K Mar 2 14:56 tmp.out.1.bed\r\n", "-rw-r--r-- 1 btsui users 23K Mar 2 14:55 tmp.out.bed\r\n", "prw-r--r-- 1 btsui users 0 Mar 2 10:50 pipe\r\n", "-rw-r--r-- 1 btsui users 780K Mar 2 10:49 complement.txt\r\n", "-rw-r--r-- 1 btsui users 266 Mar 2 10:49 genome\r\n", "-rw-r--r-- 1 btsui users 3.3M Mar 2 10:49 extracting_region.bed\r\n", "-rw-r--r-- 1 btsui users 2.5M Mar 1 11:25 10000.pickle.gz\r\n", "-rw-r--r-- 1 btsui users 2.4M Mar 1 11:25 0.pickle.gz\r\n", "-rw-r--r-- 1 btsui users 6.8M Jan 28 18:33 Pos_block_140700000\r\n", "-rw-r--r-- 1 btsui users 5.2M Jan 28 18:28 Pos_block_231700000\r\n", "-rw-r--r-- 1 btsui users 1.1K Jan 26 08:57 test.h5\r\n", "-rw-r--r-- 1 btsui users 36K Jan 24 16:47 Untitled.ipynb\r\n", "-rw-r--r-- 1 btsui users 19K Jan 24 15:13 testOne.ipynb\r\n", "-rw-r--r-- 1 btsui users 128K Jan 24 15:11 TCGA_compare.alternative_allele.png\r\n", "-rw-r--r-- 1 btsui users 18K Jan 24 15:11 TCGA_compare.alternative_allele.pdf\r\n", "-rw-r--r-- 1 btsui users 124K Jan 24 15:10 TCGA_compare.png\r\n", "-rw-r--r-- 1 btsui users 19K Jan 24 15:10 TCGA_compare.pdf\r\n", "-rw-r--r-- 1 btsui users 4.5M Jan 24 14:11 tmp.tcga.txt.gz\r\n", "-rw-r--r-- 1 btsui users 163M Jan 3 2018 0.h5\r\n", "-rw-r--r-- 1 btsui users 85M Jan 3 2018 1000.pickle.gz\r\n", "-rw-r--r-- 1 btsui users 6.5K Jan 2 2018 GenerateEmptyPicklesForEachSpecies.ipynb\r\n", "-rw-r--r-- 1 btsui users 385K Jan 2 2018 tmp2.pickle.gz\r\n", "-rw-r--r-- 1 btsui users 400K Jan 2 2018 tmp.pickle.gz\r\n", "-rw-r--r-- 1 btsui users 2.3M Jan 2 2018 SRR349840.txt.snp.gz\r\n", "-rw-r--r-- 1 btsui users 188K Jan 2 2018 SRR349840_per_fa_record_stat.txt.gz\r\n", "-rw-r--r-- 1 btsui users 81M Dec 30 2017 microbe.fa.gz\r\n", "-rw-r--r-- 1 btsui users 86M Dec 30 2017 Homo_sapiens.fa.gz\r\n", "-rw-r--r-- 1 btsui users 25 Dec 30 2017 Homo_sapiens.GRCh38.dna_rm.toplevel.fa.gz\r\n", "-rw-r--r-- 1 btsui users 302 Dec 30 2017 grch38.genome\r\n", "-rw-r--r-- 1 btsui users 221M Dec 30 2017 Homo_sapiens.GRCh38.dna_rm.toplevel.SNP_masked.fa.gz\r\n", "-rw-r--r-- 1 btsui users 7.9M Dec 30 2017 test.bed\r\n", "-rw-r--r-- 1 btsui users 53M Dec 29 2017 microbe.fa\r\n", "-rw-r--r-- 1 btsui users 5.2K Dec 28 2017 single_snp.py\r\n", "-rw-r--r-- 1 btsui users 0 Dec 26 2017 untitled.txt\r\n", "-rw-r--r-- 1 btsui users 38K Dec 26 2017 gdc_manifest.2017-12-27T02_59_36.013442.txt\r\n", "-rw-r--r-- 1 btsui users 180K Dec 26 2017 gdc_manifest.2017-12-27T02_43_35.959399.txt\r\n" ] } ], "source": [ "#!ls -alth\n", "#gdc_manifest.2018-07-11.txt" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/cellar/users/btsui/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:20: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n" ] } ], "source": [ "from tqdm import tqdm\n", "\n", "import pandas as pd\n", "import os\n", "import subprocess\n", "\n", "CWD='/cellar/users/btsui/Project/METAMAP/notebook/RapMapTest/XGS_WGS/'\n", "os.chdir(CWD)\n", "#gdc_meta_df=pd.read_json('files.2017-12-09T19_29_39.496570.json')\n", "\n", "\n", "gdc_meta_df=pd.read_csv('gdc_manifest.2017-12-27T02_59_36.013442.txt',sep='\\t')\n", "\n", "tcgaMetaDf=gdc_meta_df[gdc_meta_df.filename.str.contains('TCGA-\\w+-\\w+-0')]\n", "\n", "brain_tcga_mut_df=pd.read_pickle('/cellar/users/btsui/Project/KangZhang/NB/clinicalCleaning/../interDataDir/tcga_mutation.pickle')\n", "#(brain_tcga_mut_df['Hugo_Symbol']=='IDH1')\n", "IDH1_mutated_df=brain_tcga_mut_df[\n", " (brain_tcga_mut_df.TSS=='LGG')]\n", "IDH1_mutated_df['pid']=IDH1_mutated_df['patient'].str.extract('TCGA-\\w+-(\\w+)')\n", "\n", "\n", "gdc_meta_df['pid']=gdc_meta_df.filename.str.extract('TCGA-\\w+-(\\w+)')\n", "\n", "gdc_meta_df['idh1_mutation_status']=gdc_meta_df.pid.isin(IDH1_mutated_df.pid)\n", "gdc_meta_df=gdc_meta_df.sort_values('idh1_mutation_status',ascending=False)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(252, 7)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gdc_meta_df.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TCGA-DB-5276-01A-01D-1465_130806_SN1440_0159_BC29Y3ACXX_s_5_rg.sorted.bam\r\n", "TCGA-DB-5276-01A-01D-1465_130806_SN1440_0159_BC29Y3ACXX_s_5_rg.sorted.bam.bai\r\n", "bcf9702a-8c53-4d3b-a9bf-06d4b8c12c72_analysis.xml\r\n", "bcf9702a-8c53-4d3b-a9bf-06d4b8c12c72_experiment.xml\r\n", "bcf9702a-8c53-4d3b-a9bf-06d4b8c12c72_run.xml\r\n", "logs\r\n" ] } ], "source": [ "!ls /nrnb/users/btsui/Data/tcga_raw_lgg/02206442-a052-4c44-a4b8-1467493df2eb" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "1it [00:03, 3.78s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 9aa689c9-1b3c-45f1-8178-3f0b11ca5a11\n", "0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "2it [00:06, 3.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ e33fa79f-3d33-4f0b-81d9-705ab9c8a19c\n", "512\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "3it [00:09, 3.01s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 336bd48f-3c20-4a2f-a2ef-38bb94e30e11\n", "512\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "4it [00:11, 2.89s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 550d6316-2067-4f67-ba64-a8687f3320b1\n", "512\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "5it [00:13, 2.76s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ ab6bd849-b8ca-4864-b133-72a3aa82c923\n", "2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r", "6it [00:16, 2.73s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 02206442-a052-4c44-a4b8-1467493df2eb\n", "512\n" ] } ], "source": [ "out_dir='/nrnb/users/btsui/Data/tcga_raw_lgg/'\n", "\n", "#gdc_meta_df.cases.iloc[0]\n", "token_dir='/cellar/users/ramarty/tokens/gdc-user-token.2018-06-25T22_21_40.089Z.txt'\n", "gdc_cmd_fmt='gdc-client download -t {token_dir} -d {out_dir} {file_uuid}'\n", "\n", "for _,rowS in tqdm(gdc_meta_df.iterrows()) :\n", " file_uuid=rowS.loc['id']\n", " gdc_cmd=gdc_cmd_fmt.format(out_dir=out_dir,file_uuid=file_uuid,token_dir=token_dir)\n", " result = os.system(gdc_cmd)\n", " print (gdc_cmd)\n", " print (result)\n", " #print (os.system(gdc_cmd))\n", " \n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#!ls /cellar/users/btsui/../hcarter/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[33mWARNING: \u001b[0mYour token file '/cellar/users/ramarty/tokens/gdc-user-token.2018-06-25T22_21_40.089Z.txt' is not properly secured. Please secure your token file by ensuring that it is not readable or writeable by anyone other than the owner of the file. On Linux: chmod 600 /cellar/users/ramarty/tokens/gdc-user-token.2018-06-25T22_21_40.089Z.txt\n", " 6% [### ] ETA: 0:20:12 ] ETA: 0:23:18" ] } ], "source": [ "#!gdc-client download -t /cellar/users/ramarty/tokens/gdc-user-token.2018-06-25T22_21_40.089Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ e33fa79f-3d33-4f0b-81d9-705ab9c8a19c\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 3328\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 02206442-a052-4c44-a4b8-1467493df2eb\r\n", "drwxr-xr-x 3 btsui users 131072 Jan 24 12:54 2b0048e0-a062-40d2-a1e1-4bb763ea0ead\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 336bd48f-3c20-4a2f-a2ef-38bb94e30e11\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 27 2017 52ae2dd2-f573-41c6-ad1a-18b19c9eea35\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 550d6316-2067-4f67-ba64-a8687f3320b1\r\n", "drwxr-xr-x 3 btsui users 512 Dec 27 2017 6f5b793c-9040-4fd7-8b32-2fe33bc8c7d2\r\n", "drwxr-xr-x 3 btsui users 512 Dec 27 2017 781639a0-ea42-4e90-8e48-2dd0de69143f\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 8013ce94-6e62-4d7f-b834-fb13d709a080\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 9aa689c9-1b3c-45f1-8178-3f0b11ca5a11\r\n", "drwxr-xr-x 3 btsui users 512 Jan 4 17:27 a0e6878e-0862-4b54-897e-2a5a4fb7df86\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 ab6bd849-b8ca-4864-b133-72a3aa82c923\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 b404ac67-1c7f-4b01-8038-7432d3d6e489\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 d987c181-1947-486c-957f-c5bd782a1eba\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 27 2017 e33fa79f-3d33-4f0b-81d9-705ab9c8a19c\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 ed7bb33a-0d4e-4215-b4c5-2dc1250d882b\r\n", "drwxr-xr-x 3 btsui users 131072 Dec 26 2017 f441e949-5e27-4235-a0db-39c77aacdb5c\r\n" ] } ], "source": [ "!ls -l /nrnb/users/btsui/Data/tcga_raw/" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ 8b974222-8a7a-43ee-8a1b-20e511f2fe68\n" ] } ], "source": [ "print gdc_cmd" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "gdc-client download -t /cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt -d /nrnb/users/btsui/Data/tcga_raw/ f7c8fa51-dedd-47b0-82c2-0a87a4c7aad3" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "\"\"\"\n", "\n", "out_dir='/nrnb/users/btsui/Data/Wei/'\n", "\n", "gdc_meta_df.cases.iloc[0]\n", "\n", "token_dir='/cellar/users/btsui/../hcarter/gdc-user-token.2017-12-11T21_35_55.818Z.txt'\n", "gdc_cmd_fmt='gdc-client download -t {token_dir} -d {out_dir} {file_uuid}'\n", "\n", "for _,rowS in gdc_meta_df.iterrows():\n", " file_uuid=rowS.loc['file_id']\n", " gdc_cmd=gdc_cmd_fmt.format(out_dir=out_dir,file_uuid=file_uuid,token_dir=token_dir)\n", " os.system(gdc_cmd)\n", " \n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "v1.2.0\r\n" ] } ], "source": [ "!gdc-client --version" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "#!head /nrnb/data/controlled/2017_TCGA_genotypes/TCGA_all.map" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 1 }