{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting google-cloud-bigquery\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/72/e1/1ae3f8024e1d011bc567d54ec81e8c9afd08d107a326bd109e578475415d/google_cloud_bigquery-1.6.0-py2.py3-none-any.whl (83kB)\n", "\u001b[K 100% |████████████████████████████████| 92kB 4.7MB/s ta 0:00:011\n", "\u001b[?25hCollecting google-api-core<2.0.0dev,>=1.0.0 (from google-cloud-bigquery)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/e4/3a/f9a5746a4d1c03e4ae6d4fdea0b4275cd80320dde7b2a44439cf9e913e33/google_api_core-1.5.0-py2.py3-none-any.whl (62kB)\n", "\u001b[K 100% |████████████████████████████████| 71kB 8.9MB/s ta 0:00:011\n", "\u001b[?25hCollecting google-resumable-media>=0.2.1 (from google-cloud-bigquery)\n", " Downloading https://files.pythonhosted.org/packages/77/95/2e4020a54366423ddba715f89fb7ca456c8f048b15cada6cd6a54cf10e8c/google_resumable_media-0.3.1-py2.py3-none-any.whl\n", "Collecting google-cloud-core<0.29dev,>=0.28.0 (from google-cloud-bigquery)\n", " Downloading https://files.pythonhosted.org/packages/0f/41/ae2418b4003a14cf21c1c46d61d1b044bf02cf0f8f91598af572b9216515/google_cloud_core-0.28.1-py2.py3-none-any.whl\n", "Requirement not upgraded as not directly required: googleapis-common-protos<2.0dev,>=1.5.3 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (1.5.3)\n", "Requirement not upgraded as not directly required: six>=1.10.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (1.11.0)\n", "Requirement not upgraded as not directly required: requests<3.0.0dev,>=2.18.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (2.18.4)\n", "Requirement not upgraded as not directly required: pytz in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (2018.5)\n", "Collecting google-auth<2.0.0dev,>=0.4.0 (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery)\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/58/cb/96dbb4e50e7a9d856e89cc9c8e36ab1055f9774f7d85f37e2156c1d79d9f/google_auth-1.5.1-py2.py3-none-any.whl (65kB)\n", "\u001b[K 100% |████████████████████████████████| 71kB 7.5MB/s ta 0:00:011\n", "\u001b[?25hRequirement not upgraded as not directly required: setuptools>=34.0.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (39.2.0)\n", "Requirement not upgraded as not directly required: protobuf>=3.4.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (3.6.1)\n", "Requirement not upgraded as not directly required: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.6/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (3.0.4)\n", "Requirement not upgraded as not directly required: idna<2.7,>=2.5 in /opt/conda/lib/python3.6/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (2.6)\n", "Requirement not upgraded as not directly required: urllib3<1.23,>=1.21.1 in /opt/conda/lib/python3.6/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (1.22)\n", "Requirement not upgraded as not directly required: certifi>=2017.4.17 in /opt/conda/lib/python3.6/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (2018.10.15)\n", "Collecting cachetools>=2.0.0 (from google-auth<2.0.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery)\n", " Downloading https://files.pythonhosted.org/packages/0a/58/cbee863250b31d80f47401d04f34038db6766f95dea1cc909ea099c7e571/cachetools-2.1.0-py2.py3-none-any.whl\n", "Requirement not upgraded as not directly required: rsa>=3.1.4 in /opt/conda/lib/python3.6/site-packages (from google-auth<2.0.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (4.0)\n", "Requirement not upgraded as not directly required: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.6/site-packages (from google-auth<2.0.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (0.2.2)\n", "Requirement not upgraded as not directly required: pyasn1>=0.1.3 in /opt/conda/lib/python3.6/site-packages (from rsa>=3.1.4->google-auth<2.0.0dev,>=0.4.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery) (0.4.4)\n", "\u001b[31mmkl-random 1.0.1 requires cython, which is not installed.\u001b[0m\n", "Installing collected packages: cachetools, google-auth, google-api-core, google-resumable-media, google-cloud-core, google-cloud-bigquery\n", "Successfully installed cachetools-2.1.0 google-api-core-1.5.0 google-auth-1.5.1 google-cloud-bigquery-1.6.0 google-cloud-core-0.28.1 google-resumable-media-0.3.1\n", "\u001b[33mYou are using pip version 10.0.1, however version 18.1 is available.\n", "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" ] } ], "source": [ "import sys\n", "!{sys.executable} -m pip install --upgrade google-cloud-bigquery" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: cython in /opt/conda/lib/python3.6/site-packages (0.29)\n", "Requirement already satisfied: pandas-gbq in /opt/conda/lib/python3.6/site-packages (0.7.0)\n", "Requirement already satisfied: pandas in /opt/conda/lib/python3.6/site-packages (from pandas-gbq) (0.23.4)\n", "Requirement already satisfied: google-auth in /opt/conda/lib/python3.6/site-packages (from pandas-gbq) (1.5.1)\n", "Requirement already satisfied: google-cloud-bigquery>=0.32.0 in /opt/conda/lib/python3.6/site-packages (from pandas-gbq) (1.6.0)\n", "Requirement already satisfied: google-auth-oauthlib in /opt/conda/lib/python3.6/site-packages (from pandas-gbq) (0.2.0)\n", "Requirement already satisfied: setuptools in /opt/conda/lib/python3.6/site-packages (from pandas-gbq) (39.2.0)\n", "Requirement already satisfied: python-dateutil>=2.5.0 in /opt/conda/lib/python3.6/site-packages (from pandas->pandas-gbq) (2.7.3)\n", "Requirement already satisfied: pytz>=2011k in /opt/conda/lib/python3.6/site-packages (from pandas->pandas-gbq) (2018.5)\n", "Requirement already satisfied: numpy>=1.9.0 in /opt/conda/lib/python3.6/site-packages (from pandas->pandas-gbq) (1.15.2)\n", "Requirement already satisfied: cachetools>=2.0.0 in /opt/conda/lib/python3.6/site-packages (from google-auth->pandas-gbq) (2.1.0)\n", "Requirement already satisfied: rsa>=3.1.4 in /opt/conda/lib/python3.6/site-packages (from google-auth->pandas-gbq) (4.0)\n", "Requirement already satisfied: pyasn1-modules>=0.2.1 in /opt/conda/lib/python3.6/site-packages (from google-auth->pandas-gbq) (0.2.2)\n", "Requirement already satisfied: six>=1.9.0 in /opt/conda/lib/python3.6/site-packages (from google-auth->pandas-gbq) (1.11.0)\n", "Requirement already satisfied: google-api-core<2.0.0dev,>=1.0.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-bigquery>=0.32.0->pandas-gbq) (1.5.0)\n", "Requirement already satisfied: google-resumable-media>=0.2.1 in /opt/conda/lib/python3.6/site-packages (from google-cloud-bigquery>=0.32.0->pandas-gbq) (0.3.1)\n", "Requirement already satisfied: google-cloud-core<0.29dev,>=0.28.0 in /opt/conda/lib/python3.6/site-packages (from google-cloud-bigquery>=0.32.0->pandas-gbq) (0.28.1)\n", "Requirement already satisfied: requests-oauthlib>=0.7.0 in /opt/conda/lib/python3.6/site-packages (from google-auth-oauthlib->pandas-gbq) (1.0.0)\n", "Requirement already satisfied: pyasn1>=0.1.3 in /opt/conda/lib/python3.6/site-packages (from rsa>=3.1.4->google-auth->pandas-gbq) (0.4.4)\n", "Requirement already satisfied: googleapis-common-protos<2.0dev,>=1.5.3 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery>=0.32.0->pandas-gbq) (1.5.3)\n", "Requirement already satisfied: requests<3.0.0dev,>=2.18.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery>=0.32.0->pandas-gbq) (2.18.4)\n", "Requirement already satisfied: protobuf>=3.4.0 in /opt/conda/lib/python3.6/site-packages (from google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery>=0.32.0->pandas-gbq) (3.6.1)\n", "Requirement already satisfied: oauthlib>=0.6.2 in /opt/conda/lib/python3.6/site-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib->pandas-gbq) (2.1.0)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/conda/lib/python3.6/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery>=0.32.0->pandas-gbq) (3.0.4)\n", "Requirement already satisfied: idna<2.7,>=2.5 in /opt/conda/lib/python3.6/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery>=0.32.0->pandas-gbq) (2.6)\n", "Requirement already satisfied: urllib3<1.23,>=1.21.1 in /opt/conda/lib/python3.6/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery>=0.32.0->pandas-gbq) (1.22)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.6/site-packages (from requests<3.0.0dev,>=2.18.0->google-api-core<2.0.0dev,>=1.0.0->google-cloud-bigquery>=0.32.0->pandas-gbq) (2018.10.15)\n", "\u001b[33mYou are using pip version 10.0.1, however version 18.1 is available.\n", "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" ] } ], "source": [ "import sys\n", "!{sys.executable} -m pip install cython pandas-gbq" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from google.cloud import bigquery\n", "client = bigquery.Client()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "query_job = client.query(\"\"\"\n", "\tWITH\n", "\t table1 AS (\n", "\t SELECT\n", "\t project_short_name,\n", "\t case_barcode,\n", "\t IF (gender = 'FEMALE',\n", "\t 1,\n", "\t 0) AS F,\n", "\t IF (gender = 'MALE',\n", "\t 1,\n", "\t 0) AS M\n", "\t FROM\n", "\t `isb-cgc.TCGA_bioclin_v0.Clinical`\n", "\t GROUP BY\n", "\t project_short_name,\n", "\t case_barcode,\n", "\t gender)\n", "\t -- \n", "\t --\n", "\tSELECT\n", "\t project_short_name,\n", "\t SUM(M) AS M_count,\n", "\t SUM(F) AS F_count\n", "\tFROM\n", "\t table1\n", "\tGROUP BY\n", "\t project_short_name\n", "\t\"\"\")\n", "\n", "results = query_job.result()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TCGA-UCEC : 548 : 0\n", "TCGA-CESC : 307 : 0\n", "TCGA-OV : 587 : 0\n", "TCGA-UCS : 57 : 0\n", "TCGA-BRCA : 1085 : 12\n", "TCGA-CHOL : 25 : 20\n", "TCGA-DLBC : 26 : 22\n", "TCGA-STAD : 158 : 285\n", "TCGA-LGG : 230 : 285\n", "TCGA-ACC : 60 : 32\n", "TCGA-SKCM : 180 : 290\n", "TCGA-UVM : 35 : 45\n", "TCGA-BLCA : 108 : 304\n", "TCGA-KICH : 51 : 62\n", "TCGA-THYM : 60 : 64\n", "TCGA-MESO : 16 : 71\n", "TCGA-PCPG : 101 : 78\n", "TCGA-KIRC : 191 : 346\n", "TCGA-READ : 78 : 92\n", "TCGA-PAAD : 83 : 102\n", "TCGA-LAML : 91 : 109\n", "TCGA-GBM : 230 : 366\n", "TCGA-LUSC : 131 : 373\n", "TCGA-SARC : 142 : 119\n", "TCGA-HNSC : 142 : 386\n", "TCGA-TGCT : 0 : 134\n", "TCGA-THCA : 371 : 136\n", "TCGA-ESCA : 27 : 158\n", "TCGA-KIRP : 77 : 214\n", "TCGA-LUAD : 280 : 242\n", "TCGA-COAD : 216 : 243\n", "TCGA-PRAD : 0 : 500\n", "TCGA-LIHC : 122 : 255\n" ] } ], "source": [ "for row in results:\n", " print(\"{} : {} : {}\".format(row.project_short_name, row.F_count, row.M_count))" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "import pandas\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "projectid = \"isb-cgc-02-0001\"\n", "query = \"\"\"\n", "\tWITH\n", "\t table1 AS (\n", "\t SELECT\n", "\t project_short_name,\n", "\t case_barcode,\n", "\t IF (gender = 'FEMALE',\n", "\t 1,\n", "\t 0) AS F,\n", "\t IF (gender = 'MALE',\n", "\t 1,\n", "\t 0) AS M\n", "\t FROM\n", "\t `isb-cgc.TCGA_bioclin_v0.Clinical`\n", "\t GROUP BY\n", "\t project_short_name,\n", "\t case_barcode,\n", "\t gender)\n", "\t -- \n", "\t --\n", "\tSELECT\n", "\t project_short_name,\n", "\t SUM(M) AS M_count,\n", "\t SUM(F) AS F_count\n", "\tFROM\n", "\t table1\n", "\tGROUP BY\n", "\t project_short_name\n", "\t\"\"\"\n", "data_frame = pandas.read_gbq(query, project_id=projectid, dialect='standard')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(33, 3)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "data_frame.shape" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | project_short_name | \n", "M_count | \n", "F_count | \n", "
---|---|---|---|
0 | \n", "TCGA-UCEC | \n", "0 | \n", "548 | \n", "
1 | \n", "TCGA-CESC | \n", "0 | \n", "307 | \n", "
2 | \n", "TCGA-OV | \n", "0 | \n", "587 | \n", "
3 | \n", "TCGA-UCS | \n", "0 | \n", "57 | \n", "
4 | \n", "TCGA-BRCA | \n", "12 | \n", "1085 | \n", "
5 | \n", "TCGA-CHOL | \n", "20 | \n", "25 | \n", "
6 | \n", "TCGA-DLBC | \n", "22 | \n", "26 | \n", "
7 | \n", "TCGA-STAD | \n", "285 | \n", "158 | \n", "
8 | \n", "TCGA-LGG | \n", "285 | \n", "230 | \n", "
9 | \n", "TCGA-ACC | \n", "32 | \n", "60 | \n", "
10 | \n", "TCGA-SKCM | \n", "290 | \n", "180 | \n", "
11 | \n", "TCGA-UVM | \n", "45 | \n", "35 | \n", "
12 | \n", "TCGA-BLCA | \n", "304 | \n", "108 | \n", "
13 | \n", "TCGA-KICH | \n", "62 | \n", "51 | \n", "
14 | \n", "TCGA-THYM | \n", "64 | \n", "60 | \n", "
15 | \n", "TCGA-MESO | \n", "71 | \n", "16 | \n", "
16 | \n", "TCGA-PCPG | \n", "78 | \n", "101 | \n", "
17 | \n", "TCGA-KIRC | \n", "346 | \n", "191 | \n", "
18 | \n", "TCGA-READ | \n", "92 | \n", "78 | \n", "
19 | \n", "TCGA-PAAD | \n", "102 | \n", "83 | \n", "
20 | \n", "TCGA-LAML | \n", "109 | \n", "91 | \n", "
21 | \n", "TCGA-GBM | \n", "366 | \n", "230 | \n", "
22 | \n", "TCGA-LUSC | \n", "373 | \n", "131 | \n", "
23 | \n", "TCGA-SARC | \n", "119 | \n", "142 | \n", "
24 | \n", "TCGA-HNSC | \n", "386 | \n", "142 | \n", "
25 | \n", "TCGA-TGCT | \n", "134 | \n", "0 | \n", "
26 | \n", "TCGA-THCA | \n", "136 | \n", "371 | \n", "
27 | \n", "TCGA-ESCA | \n", "158 | \n", "27 | \n", "
28 | \n", "TCGA-KIRP | \n", "214 | \n", "77 | \n", "
29 | \n", "TCGA-LUAD | \n", "242 | \n", "280 | \n", "
30 | \n", "TCGA-COAD | \n", "243 | \n", "216 | \n", "
31 | \n", "TCGA-PRAD | \n", "500 | \n", "0 | \n", "
32 | \n", "TCGA-LIHC | \n", "255 | \n", "122 | \n", "