{ "cells": [ { "cell_type": "markdown", "id": "facial-pipeline", "metadata": {}, "source": [ "# Tutorial 6: CPTAC basics review\n", "\n", "\n", "First, we'll import the package." ] }, { "cell_type": "code", "execution_count": 1, "id": "dress-rebate", "metadata": {}, "outputs": [], "source": [ "import cptac" ] }, { "cell_type": "markdown", "id": "bb3d6f6b", "metadata": {}, "source": [ "We can list which cancers we have data for." ] }, { "cell_type": "code", "execution_count": 2, "id": "94d73be3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th></th>\n", " <th>Datatype</th>\n", " </tr>\n", " <tr>\n", " <th>Cancer</th>\n", " <th>Source</th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th rowspan=\"3\" valign=\"top\">all_cancers</th>\n", " <th>harmonized</th>\n", " <td>[somatic_mutation, ancestry_prediction]</td>\n", " </tr>\n", " <tr>\n", " <th>mssm</th>\n", " <td>[clinical]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[tumor_purity, hla_typing]</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">brca</th>\n", " <th>bcm</th>\n", " <td>[miRNA, phosphoproteomics, CNV, proteomics, tr...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics, acetylproteomics, phosphoproteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[CNV, cibersort, xcell, transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">ccrcc</th>\n", " <th>bcm</th>\n", " <td>[miRNA, phosphoproteomics, CNV, circular_RNA, ...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics, phosphoproteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[total_miRNA, CNV, mature_miRNA, precursor_miR...</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">coad</th>\n", " <th>bcm</th>\n", " <td>[miRNA, phosphoproteomics, CNV, proteomics, tr...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics, phosphoproteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[CNV, transcriptomics, xcell, somatic_mutation...</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">gbm</th>\n", " <th>bcm</th>\n", " <td>[miRNA, phosphoproteomics, CNV, circular_RNA, ...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics, acetylproteomics, phosphoproteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[total_miRNA, CNV, mature_miRNA, precursor_miR...</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">hnscc</th>\n", " <th>bcm</th>\n", " <td>[miRNA, phosphoproteomics, circular_RNA, prote...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics, phosphoproteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[total_miRNA, CNV, mature_miRNA, precursor_miR...</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">lscc</th>\n", " <th>bcm</th>\n", " <td>[miRNA, phosphoproteomics, CNV, circular_RNA, ...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics, acetylproteomics, phosphoproteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[total_miRNA, CNV, mature_miRNA, precursor_miR...</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">luad</th>\n", " <th>bcm</th>\n", " <td>[miRNA, phosphoproteomics, CNV, circular_RNA, ...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics, acetylproteomics, phosphoproteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[total_miRNA, CNV, mature_miRNA, precursor_miR...</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">ov</th>\n", " <th>bcm</th>\n", " <td>[CNV, proteomics, transcriptomics, phosphoprot...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[CNV, transcriptomics, xcell, somatic_mutation...</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">pdac</th>\n", " <th>bcm</th>\n", " <td>[miRNA, phosphoproteomics, CNV, circular_RNA, ...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics, phosphoproteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[total_miRNA, CNV, mature_miRNA, precursor_miR...</td>\n", " </tr>\n", " <tr>\n", " <th rowspan=\"4\" valign=\"top\">ucec</th>\n", " <th>bcm</th>\n", " <td>[miRNA, phosphoproteomics, CNV, circular_RNA, ...</td>\n", " </tr>\n", " <tr>\n", " <th>broad</th>\n", " <td>[transcriptomics]</td>\n", " </tr>\n", " <tr>\n", " <th>umich</th>\n", " <td>[proteomics, acetylproteomics, phosphoproteomics]</td>\n", " </tr>\n", " <tr>\n", " <th>washu</th>\n", " <td>[total_miRNA, CNV, mature_miRNA, precursor_miR...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Datatype\n", "Cancer Source \n", "all_cancers harmonized [somatic_mutation, ancestry_prediction]\n", " mssm [clinical]\n", " washu [tumor_purity, hla_typing]\n", "brca bcm [miRNA, phosphoproteomics, CNV, proteomics, tr...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [CNV, cibersort, xcell, transcriptomics]\n", "ccrcc bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "coad bcm [miRNA, phosphoproteomics, CNV, proteomics, tr...\n", " broad [transcriptomics]\n", " umich [proteomics, phosphoproteomics]\n", " washu [CNV, transcriptomics, xcell, somatic_mutation...\n", "gbm bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "hnscc bcm [miRNA, phosphoproteomics, circular_RNA, prote...\n", " broad [transcriptomics]\n", " umich [proteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "lscc bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "luad bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "ov bcm [CNV, proteomics, transcriptomics, phosphoprot...\n", " broad [transcriptomics]\n", " umich [proteomics]\n", " washu [CNV, transcriptomics, xcell, somatic_mutation...\n", "pdac bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "ucec bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR..." ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cptac.get_cancer_options()" ] }, { "cell_type": "markdown", "id": "metropolitan-stranger", "metadata": {}, "source": [ "## Load the BRCA dataset" ] }, { "cell_type": "code", "execution_count": 3, "id": "compliant-rachel", "metadata": {}, "outputs": [], "source": [ "br = cptac.Brca()" ] }, { "cell_type": "markdown", "id": "05f6c2ae", "metadata": {}, "source": [ "We can list which data types are available from which sources." ] }, { "cell_type": "code", "execution_count": 4, "id": "dce3f8b3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Data type</th>\n", " <th>Available sources</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>CNV</td>\n", " <td>[bcm, washu]</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>miRNA</td>\n", " <td>[bcm]</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>phosphoproteomics</td>\n", " <td>[bcm, umich]</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>proteomics</td>\n", " <td>[bcm, umich]</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>transcriptomics</td>\n", " <td>[bcm, broad, washu]</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>ancestry_prediction</td>\n", " <td>[harmonized]</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>somatic_mutation</td>\n", " <td>[harmonized, washu]</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>clinical</td>\n", " <td>[mssm]</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>follow-up</td>\n", " <td>[mssm]</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>medical_history</td>\n", " <td>[mssm]</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>acetylproteomics</td>\n", " <td>[umich]</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>cibersort</td>\n", " <td>[washu]</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>hla_typing</td>\n", " <td>[washu]</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>tumor_purity</td>\n", " <td>[washu]</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>xcell</td>\n", " <td>[washu]</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Data type Available sources\n", "0 CNV [bcm, washu]\n", "1 miRNA [bcm]\n", "2 phosphoproteomics [bcm, umich]\n", "3 proteomics [bcm, umich]\n", "4 transcriptomics [bcm, broad, washu]\n", "5 ancestry_prediction [harmonized]\n", "6 somatic_mutation [harmonized, washu]\n", "7 clinical [mssm]\n", "8 follow-up [mssm]\n", "9 medical_history [mssm]\n", "10 acetylproteomics [umich]\n", "11 cibersort [washu]\n", "12 hla_typing [washu]\n", "13 tumor_purity [washu]\n", "14 xcell [washu]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "br.list_data_sources()" ] }, { "cell_type": "markdown", "id": "distributed-impossible", "metadata": {}, "source": [ "## Download\n", "\n", "Each file will be automatically downloaded when requested, but authentication through your Box account is required to download pancan data.\n", "\n", "See the end of this tutorial for how to download files on a remote computer that doesn't have a web browser for logging into Box." ] }, { "cell_type": "markdown", "id": "ebaaa91f", "metadata": {}, "source": [ "Let's get some data tables." ] }, { "cell_type": "code", "execution_count": 5, "id": "gorgeous-extraction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th>Name</th>\n", " <th>tumor_code</th>\n", " <th>discovery_study</th>\n", " <th>type_of_analyzed_samples</th>\n", " <th>confirmatory_study</th>\n", " <th>type_of_analyzed_samples</th>\n", " <th>age</th>\n", " <th>sex</th>\n", " <th>race</th>\n", " <th>ethnicity</th>\n", " <th>ethnicity_race_ancestry_identified</th>\n", " <th>...</th>\n", " <th>additional_treatment_pharmaceutical_therapy_for_new_tumor</th>\n", " <th>additional_treatment_immuno_for_new_tumor</th>\n", " <th>number_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_loco-regional</th>\n", " <th>number_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_metastasis</th>\n", " <th>Recurrence-free survival, days</th>\n", " <th>Recurrence-free survival from collection, days</th>\n", " <th>Recurrence status (1, yes; 0, no)</th>\n", " <th>Overall survival, days</th>\n", " <th>Overall survival from collection, days</th>\n", " <th>Survival status (1, dead; 0, alive)</th>\n", " </tr>\n", " <tr>\n", " <th>Patient_ID</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>01BR001</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>55</td>\n", " <td>Female</td>\n", " <td>Black or African American</td>\n", " <td>Not Hispanic or Latino</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>421.0</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>01BR008</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>48</td>\n", " <td>Female</td>\n", " <td>Black or African American</td>\n", " <td>Not Hispanic or Latino</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>01BR009</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>64</td>\n", " <td>Female</td>\n", " <td>Black or African American</td>\n", " <td>Not Hispanic or Latino</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>01BR010</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>65</td>\n", " <td>Female</td>\n", " <td>Black or African American</td>\n", " <td>Not Hispanic or Latino</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>01BR015</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>35</td>\n", " <td>Female</td>\n", " <td>White</td>\n", " <td>Not Hispanic or Latino</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>347.0</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>21BR010</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>71</td>\n", " <td>Female</td>\n", " <td>White</td>\n", " <td>Hispanic or Latino</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>327.0</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>22BR003</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>30</td>\n", " <td>Female</td>\n", " <td>White</td>\n", " <td>Not Hispanic or Latino</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>22BR005</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>46</td>\n", " <td>Female</td>\n", " <td>White</td>\n", " <td>Not Hispanic or Latino</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>348.0</td>\n", " <td>NaN</td>\n", " <td>0.0</td>\n", " </tr>\n", " <tr>\n", " <th>22BR006</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>55</td>\n", " <td>Female</td>\n", " <td>Black or African American</td>\n", " <td>Not Hispanic or Latino</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>282.0</td>\n", " <td>NaN</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>604</th>\n", " <td>BR</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>41</td>\n", " <td>Female</td>\n", " <td>Black or African American</td>\n", " <td>Unknown</td>\n", " <td>NaN</td>\n", " <td>...</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>134 rows × 124 columns</p>\n", "</div>" ], "text/plain": [ "Name tumor_code discovery_study type_of_analyzed_samples \\\n", "Patient_ID \n", "01BR001 BR Yes NaN \n", "01BR008 BR Yes NaN \n", "01BR009 BR Yes NaN \n", "01BR010 BR Yes NaN \n", "01BR015 BR Yes NaN \n", "... ... ... ... \n", "21BR010 BR Yes NaN \n", "22BR003 BR Yes NaN \n", "22BR005 BR Yes NaN \n", "22BR006 BR Yes NaN \n", "604 BR Yes NaN \n", "\n", "Name confirmatory_study type_of_analyzed_samples age sex \\\n", "Patient_ID \n", "01BR001 NaN NaN 55 Female \n", "01BR008 NaN NaN 48 Female \n", "01BR009 NaN NaN 64 Female \n", "01BR010 NaN NaN 65 Female \n", "01BR015 NaN NaN 35 Female \n", "... ... ... .. ... \n", "21BR010 NaN NaN 71 Female \n", "22BR003 NaN NaN 30 Female \n", "22BR005 NaN NaN 46 Female \n", "22BR006 NaN NaN 55 Female \n", "604 NaN NaN 41 Female \n", "\n", "Name race ethnicity \\\n", "Patient_ID \n", "01BR001 Black or African American Not Hispanic or Latino \n", "01BR008 Black or African American Not Hispanic or Latino \n", "01BR009 Black or African American Not Hispanic or Latino \n", "01BR010 Black or African American Not Hispanic or Latino \n", "01BR015 White Not Hispanic or Latino \n", "... ... ... \n", "21BR010 White Hispanic or Latino \n", "22BR003 White Not Hispanic or Latino \n", "22BR005 White Not Hispanic or Latino \n", "22BR006 Black or African American Not Hispanic or Latino \n", "604 Black or African American Unknown \n", "\n", "Name ethnicity_race_ancestry_identified ... \\\n", "Patient_ID ... \n", "01BR001 NaN ... \n", "01BR008 NaN ... \n", "01BR009 NaN ... \n", "01BR010 NaN ... \n", "01BR015 NaN ... \n", "... ... ... \n", "21BR010 NaN ... \n", "22BR003 NaN ... \n", "22BR005 NaN ... \n", "22BR006 NaN ... \n", "604 NaN ... \n", "\n", "Name additional_treatment_pharmaceutical_therapy_for_new_tumor \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name additional_treatment_immuno_for_new_tumor \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name number_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_loco-regional \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name number_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_metastasis \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name Recurrence-free survival, days \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name Recurrence-free survival from collection, days \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name Recurrence status (1, yes; 0, no) Overall survival, days \\\n", "Patient_ID \n", "01BR001 0 421.0 \n", "01BR008 0 NaN \n", "01BR009 0 NaN \n", "01BR010 0 NaN \n", "01BR015 0 347.0 \n", "... ... ... \n", "21BR010 0 327.0 \n", "22BR003 0 NaN \n", "22BR005 0 348.0 \n", "22BR006 0 282.0 \n", "604 0 NaN \n", "\n", "Name Overall survival from collection, days \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name Survival status (1, dead; 0, alive) \n", "Patient_ID \n", "01BR001 0.0 \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 0.0 \n", "... ... \n", "21BR010 0.0 \n", "22BR003 NaN \n", "22BR005 0.0 \n", "22BR006 1.0 \n", "604 NaN \n", "\n", "[134 rows x 124 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "br.get_clinical(source=\"mssm\")\n", "#Note: the syntax above is optional, calling the function as br.get_clinical('mssm') works just as well" ] }, { "cell_type": "code", "execution_count": 6, "id": "accredited-hardwood", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "cptac warning: Your version of cptac (1.5.1) is out-of-date. Latest is 1.5.0. Please run 'pip install --upgrade cptac' to update it. (C:\\Users\\sabme\\anaconda3\\lib\\threading.py, line 910)\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th>Name</th>\n", " <th>Gene</th>\n", " <th>Mutation</th>\n", " <th>Location</th>\n", " <th>Entrez_Gene_Id</th>\n", " <th>NCBI_Build</th>\n", " <th>Chromosome</th>\n", " <th>Start_Position</th>\n", " <th>End_Position</th>\n", " <th>Strand</th>\n", " <th>Variant_Type</th>\n", " <th>...</th>\n", " <th>HGNC_UniProt_ID(supplied_by_UniProt)</th>\n", " <th>HGNC_Ensembl_ID(supplied_by_Ensembl)</th>\n", " <th>HGNC_UCSC_ID(supplied_by_UCSC)</th>\n", " <th>Oreganno_Build</th>\n", " <th>Simple_Uniprot_alt_uniprot_accessions</th>\n", " <th>dbSNP_TOPMED</th>\n", " <th>HGNC_Entrez_Gene_ID(supplied_by_NCBI)</th>\n", " <th>COHORT</th>\n", " <th>getz</th>\n", " <th>washu</th>\n", " </tr>\n", " <tr>\n", " <th>Patient_ID</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>01BR001</th>\n", " <td>CCDC136</td>\n", " <td>Intron</td>\n", " <td>NaN</td>\n", " <td>64753.0</td>\n", " <td>hg38</td>\n", " <td>chr7</td>\n", " <td>128815741</td>\n", " <td>128815741</td>\n", " <td>+</td>\n", " <td>SNP</td>\n", " <td>...</td>\n", " <td>Q96JN2</td>\n", " <td>ENSG00000128596</td>\n", " <td>uc003vnv.3</td>\n", " <td>NaN</td>\n", " <td>A4D1K1|A7MCY7|A8MYA7|Q6ZVK7|Q9H8M3|Q9UFE1</td>\n", " <td>NaN</td>\n", " <td>64753.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>01BR001</th>\n", " <td>MYBPC1</td>\n", " <td>Splice_Site</td>\n", " <td>NaN</td>\n", " <td>4604.0</td>\n", " <td>hg38</td>\n", " <td>chr12</td>\n", " <td>101661264</td>\n", " <td>101661264</td>\n", " <td>+</td>\n", " <td>SNP</td>\n", " <td>...</td>\n", " <td>Q00872</td>\n", " <td>ENSG00000196091</td>\n", " <td>uc001tih.4</td>\n", " <td>NaN</td>\n", " <td>B4DKR5|B7Z8G8|B7ZL02|B7ZL09|B7ZL10|E7ESM5|E7EW...</td>\n", " <td>NaN</td>\n", " <td>4604.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>01BR001</th>\n", " <td>KRT77</td>\n", " <td>Silent</td>\n", " <td>p.G516G</td>\n", " <td>374454.0</td>\n", " <td>hg38</td>\n", " <td>chr12</td>\n", " <td>52691354</td>\n", " <td>52691354</td>\n", " <td>+</td>\n", " <td>SNP</td>\n", " <td>...</td>\n", " <td>Q7Z794</td>\n", " <td>ENSG00000189182</td>\n", " <td>uc001saw.4</td>\n", " <td>NaN</td>\n", " <td>Q7RTS8</td>\n", " <td>NaN</td>\n", " <td>374454.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>01BR001</th>\n", " <td>TENM4</td>\n", " <td>Missense_Mutation</td>\n", " <td>p.E19K</td>\n", " <td>26011.0</td>\n", " <td>hg38</td>\n", " <td>chr11</td>\n", " <td>79069890</td>\n", " <td>79069890</td>\n", " <td>+</td>\n", " <td>SNP</td>\n", " <td>...</td>\n", " <td>Q6N022</td>\n", " <td>ENSG00000149256</td>\n", " <td>uc001ozl.5</td>\n", " <td>hg38</td>\n", " <td>A6ND26|Q7Z3C7|Q96MS6|Q9P2P4|Q9Y4S2</td>\n", " <td>NaN</td>\n", " <td>26011.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>01BR001</th>\n", " <td>PPFIA1</td>\n", " <td>Missense_Mutation</td>\n", " <td>p.H795L</td>\n", " <td>8500.0</td>\n", " <td>hg38</td>\n", " <td>chr11</td>\n", " <td>70355707</td>\n", " <td>70355707</td>\n", " <td>+</td>\n", " <td>SNP</td>\n", " <td>...</td>\n", " <td>Q13136</td>\n", " <td>ENSG00000131626</td>\n", " <td>uc001opo.4</td>\n", " <td>NaN</td>\n", " <td>A6NLE3|Q13135|Q14567|Q8N4I2</td>\n", " <td>NaN</td>\n", " <td>8500.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>604</th>\n", " <td>EXPH5</td>\n", " <td>Silent</td>\n", " <td>p.Q1690Q</td>\n", " <td>23086.0</td>\n", " <td>hg38</td>\n", " <td>chr11</td>\n", " <td>108510437</td>\n", " <td>108510437</td>\n", " <td>+</td>\n", " <td>SNP</td>\n", " <td>...</td>\n", " <td>Q8NEV8</td>\n", " <td>ENSG00000110723</td>\n", " <td>uc001pkk.3</td>\n", " <td>NaN</td>\n", " <td>Q2KHM1|Q9Y4D6</td>\n", " <td>0.99998407237512742,0.00001592762487257</td>\n", " <td>23086.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>604</th>\n", " <td>IGSF9B</td>\n", " <td>Missense_Mutation</td>\n", " <td>p.V493M</td>\n", " <td>22997.0</td>\n", " <td>hg38</td>\n", " <td>chr11</td>\n", " <td>133931026</td>\n", " <td>133931026</td>\n", " <td>+</td>\n", " <td>SNP</td>\n", " <td>...</td>\n", " <td>Q9UPX0</td>\n", " <td>ENSG00000080854</td>\n", " <td>uc031qfh.2</td>\n", " <td>hg38</td>\n", " <td>G5EA26</td>\n", " <td>NaN</td>\n", " <td>22997.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>604</th>\n", " <td>ANO2</td>\n", " <td>Intron</td>\n", " <td>NaN</td>\n", " <td>57101.0</td>\n", " <td>hg38</td>\n", " <td>chr12</td>\n", " <td>5827750</td>\n", " <td>5827750</td>\n", " <td>+</td>\n", " <td>SNP</td>\n", " <td>...</td>\n", " <td>Q9NQ90</td>\n", " <td>ENSG00000047617</td>\n", " <td>uc058kbl.1</td>\n", " <td>NaN</td>\n", " <td>C4N787|Q9H847</td>\n", " <td>0.99998407237512742,.,0.00001592762487257</td>\n", " <td>57101.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>604</th>\n", " <td>HFM1</td>\n", " <td>Silent</td>\n", " <td>p.G133G</td>\n", " <td>164045.0</td>\n", " <td>hg38</td>\n", " <td>chr1</td>\n", " <td>91394188</td>\n", " <td>91394188</td>\n", " <td>+</td>\n", " <td>SNP</td>\n", " <td>...</td>\n", " <td>A2PYH4</td>\n", " <td>ENSG00000162669</td>\n", " <td>uc001doa.4</td>\n", " <td>NaN</td>\n", " <td>B1B0B6|Q8N9Q0</td>\n", " <td>NaN</td>\n", " <td>164045.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>True</td>\n", " </tr>\n", " <tr>\n", " <th>604</th>\n", " <td>SLC44A2</td>\n", " <td>Frame_Shift_Del</td>\n", " <td>p.E435fs</td>\n", " <td>57153.0</td>\n", " <td>hg38</td>\n", " <td>chr19</td>\n", " <td>10636388</td>\n", " <td>10636407</td>\n", " <td>+</td>\n", " <td>DEL</td>\n", " <td>...</td>\n", " <td>Q8IWA5</td>\n", " <td>ENSG00000129353</td>\n", " <td>uc002mpf.4</td>\n", " <td>NaN</td>\n", " <td>B2RBB1|B3KNH3|B4DFJ0|F2Q9D7|Q658V1|Q658Z2|Q6PJ...</td>\n", " <td>NaN</td>\n", " <td>57153.0</td>\n", " <td>BRCA</td>\n", " <td>True</td>\n", " <td>True</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>29017 rows × 131 columns</p>\n", "</div>" ], "text/plain": [ "Name Gene Mutation Location Entrez_Gene_Id NCBI_Build \\\n", "Patient_ID \n", "01BR001 CCDC136 Intron NaN 64753.0 hg38 \n", "01BR001 MYBPC1 Splice_Site NaN 4604.0 hg38 \n", "01BR001 KRT77 Silent p.G516G 374454.0 hg38 \n", "01BR001 TENM4 Missense_Mutation p.E19K 26011.0 hg38 \n", "01BR001 PPFIA1 Missense_Mutation p.H795L 8500.0 hg38 \n", "... ... ... ... ... ... \n", "604 EXPH5 Silent p.Q1690Q 23086.0 hg38 \n", "604 IGSF9B Missense_Mutation p.V493M 22997.0 hg38 \n", "604 ANO2 Intron NaN 57101.0 hg38 \n", "604 HFM1 Silent p.G133G 164045.0 hg38 \n", "604 SLC44A2 Frame_Shift_Del p.E435fs 57153.0 hg38 \n", "\n", "Name Chromosome Start_Position End_Position Strand Variant_Type ... \\\n", "Patient_ID ... \n", "01BR001 chr7 128815741 128815741 + SNP ... \n", "01BR001 chr12 101661264 101661264 + SNP ... \n", "01BR001 chr12 52691354 52691354 + SNP ... \n", "01BR001 chr11 79069890 79069890 + SNP ... \n", "01BR001 chr11 70355707 70355707 + SNP ... \n", "... ... ... ... ... ... ... \n", "604 chr11 108510437 108510437 + SNP ... \n", "604 chr11 133931026 133931026 + SNP ... \n", "604 chr12 5827750 5827750 + SNP ... \n", "604 chr1 91394188 91394188 + SNP ... \n", "604 chr19 10636388 10636407 + DEL ... \n", "\n", "Name HGNC_UniProt_ID(supplied_by_UniProt) \\\n", "Patient_ID \n", "01BR001 Q96JN2 \n", "01BR001 Q00872 \n", "01BR001 Q7Z794 \n", "01BR001 Q6N022 \n", "01BR001 Q13136 \n", "... ... \n", "604 Q8NEV8 \n", "604 Q9UPX0 \n", "604 Q9NQ90 \n", "604 A2PYH4 \n", "604 Q8IWA5 \n", "\n", "Name HGNC_Ensembl_ID(supplied_by_Ensembl) \\\n", "Patient_ID \n", "01BR001 ENSG00000128596 \n", "01BR001 ENSG00000196091 \n", "01BR001 ENSG00000189182 \n", "01BR001 ENSG00000149256 \n", "01BR001 ENSG00000131626 \n", "... ... \n", "604 ENSG00000110723 \n", "604 ENSG00000080854 \n", "604 ENSG00000047617 \n", "604 ENSG00000162669 \n", "604 ENSG00000129353 \n", "\n", "Name HGNC_UCSC_ID(supplied_by_UCSC) Oreganno_Build \\\n", "Patient_ID \n", "01BR001 uc003vnv.3 NaN \n", "01BR001 uc001tih.4 NaN \n", "01BR001 uc001saw.4 NaN \n", "01BR001 uc001ozl.5 hg38 \n", "01BR001 uc001opo.4 NaN \n", "... ... ... \n", "604 uc001pkk.3 NaN \n", "604 uc031qfh.2 hg38 \n", "604 uc058kbl.1 NaN \n", "604 uc001doa.4 NaN \n", "604 uc002mpf.4 NaN \n", "\n", "Name Simple_Uniprot_alt_uniprot_accessions \\\n", "Patient_ID \n", "01BR001 A4D1K1|A7MCY7|A8MYA7|Q6ZVK7|Q9H8M3|Q9UFE1 \n", "01BR001 B4DKR5|B7Z8G8|B7ZL02|B7ZL09|B7ZL10|E7ESM5|E7EW... \n", "01BR001 Q7RTS8 \n", "01BR001 A6ND26|Q7Z3C7|Q96MS6|Q9P2P4|Q9Y4S2 \n", "01BR001 A6NLE3|Q13135|Q14567|Q8N4I2 \n", "... ... \n", "604 Q2KHM1|Q9Y4D6 \n", "604 G5EA26 \n", "604 C4N787|Q9H847 \n", "604 B1B0B6|Q8N9Q0 \n", "604 B2RBB1|B3KNH3|B4DFJ0|F2Q9D7|Q658V1|Q658Z2|Q6PJ... \n", "\n", "Name dbSNP_TOPMED \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR001 NaN \n", "01BR001 NaN \n", "01BR001 NaN \n", "01BR001 NaN \n", "... ... \n", "604 0.99998407237512742,0.00001592762487257 \n", "604 NaN \n", "604 0.99998407237512742,.,0.00001592762487257 \n", "604 NaN \n", "604 NaN \n", "\n", "Name HGNC_Entrez_Gene_ID(supplied_by_NCBI) COHORT getz washu \n", "Patient_ID \n", "01BR001 64753.0 BRCA True True \n", "01BR001 4604.0 BRCA True True \n", "01BR001 374454.0 BRCA True NaN \n", "01BR001 26011.0 BRCA True True \n", "01BR001 8500.0 BRCA True True \n", "... ... ... ... ... \n", "604 23086.0 BRCA True True \n", "604 22997.0 BRCA True True \n", "604 57101.0 BRCA True NaN \n", "604 164045.0 BRCA True True \n", "604 57153.0 BRCA True True \n", "\n", "[29017 rows x 131 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "br.get_somatic_mutation('harmonized')" ] }, { "cell_type": "code", "execution_count": 7, "id": "clear-cliff", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead tr th {\n", " text-align: left;\n", " }\n", "\n", " .dataframe thead tr:last-of-type th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr>\n", " <th>Name</th>\n", " <th>ARF5</th>\n", " <th>M6PR</th>\n", " <th>ESRRA</th>\n", " <th>FKBP4</th>\n", " <th>NDUFAF7</th>\n", " <th>FUCA2</th>\n", " <th>DBNDD1</th>\n", " <th>SEMA3F</th>\n", " <th>CFTR</th>\n", " <th>CYP51A1</th>\n", " <th>...</th>\n", " <th>DDHD1</th>\n", " <th>WIZ</th>\n", " <th>GBF1</th>\n", " <th>APOA5</th>\n", " <th>WIZ</th>\n", " <th>LDB1</th>\n", " <th>WIZ</th>\n", " <th>RFX7</th>\n", " <th>SWSAP1</th>\n", " <th>SVIL</th>\n", " </tr>\n", " <tr>\n", " <th>Database_ID</th>\n", " <th>ENSP00000000233.5</th>\n", " <th>ENSP00000000412.3</th>\n", " <th>ENSP00000000442.6</th>\n", " <th>ENSP00000001008.4</th>\n", " <th>ENSP00000002125.4</th>\n", " <th>ENSP00000002165.5</th>\n", " <th>ENSP00000002501.6</th>\n", " <th>ENSP00000002829.3</th>\n", " <th>ENSP00000003084.6</th>\n", " <th>ENSP00000003100.8</th>\n", " <th>...</th>\n", " <th>ENSP00000500986.2</th>\n", " <th>ENSP00000500993.1</th>\n", " <th>ENSP00000501064.1</th>\n", " <th>ENSP00000501141.1</th>\n", " <th>ENSP00000501256.3</th>\n", " <th>ENSP00000501277.1</th>\n", " <th>ENSP00000501300.1</th>\n", " <th>ENSP00000501317.1</th>\n", " <th>ENSP00000501355.1</th>\n", " <th>ENSP00000501521.1</th>\n", " </tr>\n", " <tr>\n", " <th>Patient_ID</th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " <th></th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>01BR001</th>\n", " <td>0.012367</td>\n", " <td>-0.945999</td>\n", " <td>NaN</td>\n", " <td>-0.478170</td>\n", " <td>1.135840</td>\n", " <td>-0.512706</td>\n", " <td>0.750335</td>\n", " <td>-0.274824</td>\n", " <td>NaN</td>\n", " <td>-0.278244</td>\n", " <td>...</td>\n", " <td>-0.649127</td>\n", " <td>-0.580869</td>\n", " <td>-0.226667</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>-0.676185</td>\n", " <td>-0.068202</td>\n", " <td>-0.078207</td>\n", " <td>-0.328420</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>01BR008</th>\n", " <td>-0.514386</td>\n", " <td>0.462307</td>\n", " <td>0.230124</td>\n", " <td>-0.555968</td>\n", " <td>0.491366</td>\n", " <td>-0.656034</td>\n", " <td>-1.220890</td>\n", " <td>-0.369282</td>\n", " <td>-1.036441</td>\n", " <td>-0.059327</td>\n", " <td>...</td>\n", " <td>0.632221</td>\n", " <td>NaN</td>\n", " <td>0.032873</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>-0.015459</td>\n", " <td>0.227424</td>\n", " <td>0.325643</td>\n", " <td>-0.606240</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>01BR009</th>\n", " <td>-0.210782</td>\n", " <td>-0.085055</td>\n", " <td>0.380296</td>\n", " <td>-0.389491</td>\n", " <td>1.255391</td>\n", " <td>-0.608007</td>\n", " <td>-0.231318</td>\n", " <td>0.092870</td>\n", " <td>-1.505195</td>\n", " <td>0.206595</td>\n", " <td>...</td>\n", " <td>0.450818</td>\n", " <td>NaN</td>\n", " <td>-0.341503</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>-0.220239</td>\n", " <td>0.125092</td>\n", " <td>0.365397</td>\n", " <td>-0.167392</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>01BR010</th>\n", " <td>0.105457</td>\n", " <td>0.351335</td>\n", " <td>-0.322798</td>\n", " <td>-0.821610</td>\n", " <td>0.241406</td>\n", " <td>-0.500140</td>\n", " <td>-0.137824</td>\n", " <td>0.113791</td>\n", " <td>NaN</td>\n", " <td>0.498314</td>\n", " <td>...</td>\n", " <td>-0.423470</td>\n", " <td>NaN</td>\n", " <td>0.360900</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>-0.451556</td>\n", " <td>-0.098897</td>\n", " <td>0.208643</td>\n", " <td>-0.729096</td>\n", " <td>0.670307</td>\n", " </tr>\n", " <tr>\n", " <th>01BR015</th>\n", " <td>-0.509298</td>\n", " <td>-0.874164</td>\n", " <td>NaN</td>\n", " <td>-0.113804</td>\n", " <td>-0.131347</td>\n", " <td>-0.412813</td>\n", " <td>0.262210</td>\n", " <td>0.042333</td>\n", " <td>NaN</td>\n", " <td>-0.657666</td>\n", " <td>...</td>\n", " <td>0.406016</td>\n", " <td>-0.493869</td>\n", " <td>-0.192847</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0.083639</td>\n", " <td>0.966976</td>\n", " <td>-0.012664</td>\n", " <td>0.081968</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>21BR010</th>\n", " <td>0.528298</td>\n", " <td>-0.127929</td>\n", " <td>-0.497360</td>\n", " <td>-0.151022</td>\n", " <td>0.082288</td>\n", " <td>0.447267</td>\n", " <td>0.151024</td>\n", " <td>0.220194</td>\n", " <td>0.516739</td>\n", " <td>-0.230357</td>\n", " <td>...</td>\n", " <td>-0.172151</td>\n", " <td>0.636608</td>\n", " <td>0.267400</td>\n", " <td>NaN</td>\n", " <td>-0.09507</td>\n", " <td>-0.017522</td>\n", " <td>-0.220463</td>\n", " <td>-0.067717</td>\n", " <td>-0.311446</td>\n", " <td>0.602422</td>\n", " </tr>\n", " <tr>\n", " <th>22BR005</th>\n", " <td>-0.549542</td>\n", " <td>0.134236</td>\n", " <td>NaN</td>\n", " <td>0.580773</td>\n", " <td>-0.080663</td>\n", " <td>-0.056509</td>\n", " <td>-0.148632</td>\n", " <td>0.260986</td>\n", " <td>NaN</td>\n", " <td>-0.348578</td>\n", " <td>...</td>\n", " <td>0.791937</td>\n", " <td>NaN</td>\n", " <td>0.171712</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0.083980</td>\n", " <td>-0.200083</td>\n", " <td>0.155198</td>\n", " <td>NaN</td>\n", " <td>0.456801</td>\n", " </tr>\n", " <tr>\n", " <th>22BR006</th>\n", " <td>0.336092</td>\n", " <td>0.125742</td>\n", " <td>NaN</td>\n", " <td>-0.360510</td>\n", " <td>0.086199</td>\n", " <td>0.470607</td>\n", " <td>-0.515990</td>\n", " <td>-0.162247</td>\n", " <td>1.003075</td>\n", " <td>0.342987</td>\n", " <td>...</td>\n", " <td>-0.080755</td>\n", " <td>NaN</td>\n", " <td>0.174904</td>\n", " <td>-0.353412</td>\n", " <td>NaN</td>\n", " <td>-0.013793</td>\n", " <td>-0.253829</td>\n", " <td>-0.117960</td>\n", " <td>NaN</td>\n", " <td>1.094966</td>\n", " </tr>\n", " <tr>\n", " <th>CPT000814</th>\n", " <td>-0.518995</td>\n", " <td>0.262582</td>\n", " <td>0.277980</td>\n", " <td>0.137505</td>\n", " <td>0.600041</td>\n", " <td>-1.041230</td>\n", " <td>0.513974</td>\n", " <td>-0.012011</td>\n", " <td>NaN</td>\n", " <td>-0.411714</td>\n", " <td>...</td>\n", " <td>-2.011008</td>\n", " <td>NaN</td>\n", " <td>0.035445</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>-1.385942</td>\n", " <td>0.620827</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>CPT001846</th>\n", " <td>0.652513</td>\n", " <td>-0.165268</td>\n", " <td>0.809801</td>\n", " <td>-1.108263</td>\n", " <td>0.557576</td>\n", " <td>0.007854</td>\n", " <td>0.213734</td>\n", " <td>-0.721577</td>\n", " <td>-2.695651</td>\n", " <td>0.296581</td>\n", " <td>...</td>\n", " <td>-0.057923</td>\n", " <td>NaN</td>\n", " <td>0.252335</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>0.040722</td>\n", " <td>-0.006118</td>\n", " <td>-0.251360</td>\n", " <td>0.443203</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>125 rows × 12922 columns</p>\n", "</div>" ], "text/plain": [ "Name ARF5 M6PR ESRRA \\\n", "Database_ID ENSP00000000233.5 ENSP00000000412.3 ENSP00000000442.6 \n", "Patient_ID \n", "01BR001 0.012367 -0.945999 NaN \n", "01BR008 -0.514386 0.462307 0.230124 \n", "01BR009 -0.210782 -0.085055 0.380296 \n", "01BR010 0.105457 0.351335 -0.322798 \n", "01BR015 -0.509298 -0.874164 NaN \n", "... ... ... ... \n", "21BR010 0.528298 -0.127929 -0.497360 \n", "22BR005 -0.549542 0.134236 NaN \n", "22BR006 0.336092 0.125742 NaN \n", "CPT000814 -0.518995 0.262582 0.277980 \n", "CPT001846 0.652513 -0.165268 0.809801 \n", "\n", "Name FKBP4 NDUFAF7 FUCA2 \\\n", "Database_ID ENSP00000001008.4 ENSP00000002125.4 ENSP00000002165.5 \n", "Patient_ID \n", "01BR001 -0.478170 1.135840 -0.512706 \n", "01BR008 -0.555968 0.491366 -0.656034 \n", "01BR009 -0.389491 1.255391 -0.608007 \n", "01BR010 -0.821610 0.241406 -0.500140 \n", "01BR015 -0.113804 -0.131347 -0.412813 \n", "... ... ... ... \n", "21BR010 -0.151022 0.082288 0.447267 \n", "22BR005 0.580773 -0.080663 -0.056509 \n", "22BR006 -0.360510 0.086199 0.470607 \n", "CPT000814 0.137505 0.600041 -1.041230 \n", "CPT001846 -1.108263 0.557576 0.007854 \n", "\n", "Name DBNDD1 SEMA3F CFTR \\\n", "Database_ID ENSP00000002501.6 ENSP00000002829.3 ENSP00000003084.6 \n", "Patient_ID \n", "01BR001 0.750335 -0.274824 NaN \n", "01BR008 -1.220890 -0.369282 -1.036441 \n", "01BR009 -0.231318 0.092870 -1.505195 \n", "01BR010 -0.137824 0.113791 NaN \n", "01BR015 0.262210 0.042333 NaN \n", "... ... ... ... \n", "21BR010 0.151024 0.220194 0.516739 \n", "22BR005 -0.148632 0.260986 NaN \n", "22BR006 -0.515990 -0.162247 1.003075 \n", "CPT000814 0.513974 -0.012011 NaN \n", "CPT001846 0.213734 -0.721577 -2.695651 \n", "\n", "Name CYP51A1 ... DDHD1 WIZ \\\n", "Database_ID ENSP00000003100.8 ... ENSP00000500986.2 ENSP00000500993.1 \n", "Patient_ID ... \n", "01BR001 -0.278244 ... -0.649127 -0.580869 \n", "01BR008 -0.059327 ... 0.632221 NaN \n", "01BR009 0.206595 ... 0.450818 NaN \n", "01BR010 0.498314 ... -0.423470 NaN \n", "01BR015 -0.657666 ... 0.406016 -0.493869 \n", "... ... ... ... ... \n", "21BR010 -0.230357 ... -0.172151 0.636608 \n", "22BR005 -0.348578 ... 0.791937 NaN \n", "22BR006 0.342987 ... -0.080755 NaN \n", "CPT000814 -0.411714 ... -2.011008 NaN \n", "CPT001846 0.296581 ... -0.057923 NaN \n", "\n", "Name GBF1 APOA5 WIZ \\\n", "Database_ID ENSP00000501064.1 ENSP00000501141.1 ENSP00000501256.3 \n", "Patient_ID \n", "01BR001 -0.226667 NaN NaN \n", "01BR008 0.032873 NaN NaN \n", "01BR009 -0.341503 NaN NaN \n", "01BR010 0.360900 NaN NaN \n", "01BR015 -0.192847 NaN NaN \n", "... ... ... ... \n", "21BR010 0.267400 NaN -0.09507 \n", "22BR005 0.171712 NaN NaN \n", "22BR006 0.174904 -0.353412 NaN \n", "CPT000814 0.035445 NaN NaN \n", "CPT001846 0.252335 NaN NaN \n", "\n", "Name LDB1 WIZ RFX7 \\\n", "Database_ID ENSP00000501277.1 ENSP00000501300.1 ENSP00000501317.1 \n", "Patient_ID \n", "01BR001 -0.676185 -0.068202 -0.078207 \n", "01BR008 -0.015459 0.227424 0.325643 \n", "01BR009 -0.220239 0.125092 0.365397 \n", "01BR010 -0.451556 -0.098897 0.208643 \n", "01BR015 0.083639 0.966976 -0.012664 \n", "... ... ... ... \n", "21BR010 -0.017522 -0.220463 -0.067717 \n", "22BR005 0.083980 -0.200083 0.155198 \n", "22BR006 -0.013793 -0.253829 -0.117960 \n", "CPT000814 -1.385942 0.620827 NaN \n", "CPT001846 0.040722 -0.006118 -0.251360 \n", "\n", "Name SWSAP1 SVIL \n", "Database_ID ENSP00000501355.1 ENSP00000501521.1 \n", "Patient_ID \n", "01BR001 -0.328420 NaN \n", "01BR008 -0.606240 NaN \n", "01BR009 -0.167392 NaN \n", "01BR010 -0.729096 0.670307 \n", "01BR015 0.081968 NaN \n", "... ... ... \n", "21BR010 -0.311446 0.602422 \n", "22BR005 NaN 0.456801 \n", "22BR006 NaN 1.094966 \n", "CPT000814 NaN NaN \n", "CPT001846 0.443203 NaN \n", "\n", "[125 rows x 12922 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "br.get_proteomics(source=\"umich\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }