{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from tqdm import tqdm\n", "\n", "import pandas as pd\n", "import os\n", "import subprocess" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "gdc_meta_df=pd.read_json('files.2018-07-12.json')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "#4240 if just slicing using VCF\n", "m_vcf=(gdc_meta_df.data_format=='VCF')\n", "#m_vep=(~gdc_meta_df.file_name.str.contains('vep.vcf.gz$'))\n", "m_wxs=(gdc_meta_df['experimental_strategy']=='WXS')\n", "m_somatic=gdc_meta_df['data_type']=='Raw Simple Somatic Mutation'\n", "m_annot=gdc_meta_df.annotations.notnull()\n", "gdc_meta_df_sub=gdc_meta_df[m_vcf&m_wxs&m_somatic]\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.331879807" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gdc_meta_df_sub['file_size'].sum()/10**9" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "out_dir='/nrnb/users/btsui/Data/tcga_orig_vcf/'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "token_dir='/cellar/users/ramarty/tokens/gdc-user-token.2018-06-25T22_21_40.089Z.txt'\n", "gdc_cmd_fmt='gdc-client download -t {token_dir} -d {out_dir} {file_uuid}'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 61%|██████ | 1286/2120 [1:15:34<49:00, 3.53s/it] " ] } ], "source": [ "for (_,tmpS) in tqdm((list(gdc_meta_df_sub.iterrows()))):\n", " file_uuid=tmpS['file_id']\n", " gdc_cmd=gdc_cmd_fmt.format(out_dir=out_dir,file_uuid=file_uuid,token_dir=token_dir)\n", " #print (gdc_cmd)\n", " os.system(gdc_cmd)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##most likely the file folder name match the uuid on the files " ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'project': {'project_id': 'TCGA-LGG'},\n", " 'case_id': '6a0bcf0c-fa4c-4119-99d2-f722b781d20f'}]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#def \n", "gdc_meta_df_sub['cases'].iloc[2\n", " ]\n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "#gdc_meta_df_sub" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 21M\r\n", "drwxr-xr-x 3 btsui users 512 Jul 12 08:27 3a0e5ae0-dc79-468d-b459-a6d43b612851\r\n", "drwxr-xr-x 84 btsui users 128K Jul 12 08:23 .\r\n", "drwxr-xr-x 3 btsui users 128K Jul 12 08:22 ceb1a38c-fc22-4d27-9ada-553c1765f1f6\r\n", "drwxr-xr-x 13 btsui users 128K Jul 11 23:39 ..\r\n", "drwxr-xr-x 3 btsui users 512 Jul 10 06:58 fc837d52-5e38-4d1d-a953-48c321d60ce5\r\n", "drwxr-xr-x 3 btsui users 128K Jul 10 06:51 30e7af75-7d8e-4aa5-b01e-1149dff334ac\r\n", "drwxr-xr-x 3 btsui users 128K Jul 10 05:05 1c15ff7e-3cbc-41b1-b814-1cf03f1f5a27\r\n", "drwxr-xr-x 3 btsui users 128K Jul 10 04:59 97216c14-8db6-4a96-8df6-1e710f4a0ed3\r\n", "drwxr-xr-x 3 btsui users 128K Jul 10 04:58 2b1e29cb-e83c-4e25-a643-1ded3bbcccb6\r\n" ] } ], "source": [ "!ls -lath /nrnb/users/btsui/Data/tcga_raw_lgg/ | head" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }