{ "cells": [ { "cell_type": "markdown", "id": "facial-pipeline", "metadata": {}, "source": [ "# Tutorial 6: CPTAC basics review\n", "\n", "\n", "First, we'll import the package." ] }, { "cell_type": "code", "execution_count": 1, "id": "dress-rebate", "metadata": {}, "outputs": [], "source": [ "import cptac" ] }, { "cell_type": "markdown", "id": "bb3d6f6b", "metadata": {}, "source": [ "We can list which cancers we have data for." ] }, { "cell_type": "code", "execution_count": 2, "id": "94d73be3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Datatype
CancerSource
all_cancersharmonized[somatic_mutation, ancestry_prediction]
mssm[clinical]
washu[tumor_purity, hla_typing]
brcabcm[miRNA, phosphoproteomics, CNV, proteomics, tr...
broad[transcriptomics]
umich[proteomics, acetylproteomics, phosphoproteomics]
washu[CNV, cibersort, xcell, transcriptomics]
ccrccbcm[miRNA, phosphoproteomics, CNV, circular_RNA, ...
broad[transcriptomics]
umich[proteomics, phosphoproteomics]
washu[total_miRNA, CNV, mature_miRNA, precursor_miR...
coadbcm[miRNA, phosphoproteomics, CNV, proteomics, tr...
broad[transcriptomics]
umich[proteomics, phosphoproteomics]
washu[CNV, transcriptomics, xcell, somatic_mutation...
gbmbcm[miRNA, phosphoproteomics, CNV, circular_RNA, ...
broad[transcriptomics]
umich[proteomics, acetylproteomics, phosphoproteomics]
washu[total_miRNA, CNV, mature_miRNA, precursor_miR...
hnsccbcm[miRNA, phosphoproteomics, circular_RNA, prote...
broad[transcriptomics]
umich[proteomics, phosphoproteomics]
washu[total_miRNA, CNV, mature_miRNA, precursor_miR...
lsccbcm[miRNA, phosphoproteomics, CNV, circular_RNA, ...
broad[transcriptomics]
umich[proteomics, acetylproteomics, phosphoproteomics]
washu[total_miRNA, CNV, mature_miRNA, precursor_miR...
luadbcm[miRNA, phosphoproteomics, CNV, circular_RNA, ...
broad[transcriptomics]
umich[proteomics, acetylproteomics, phosphoproteomics]
washu[total_miRNA, CNV, mature_miRNA, precursor_miR...
ovbcm[CNV, proteomics, transcriptomics, phosphoprot...
broad[transcriptomics]
umich[proteomics]
washu[CNV, transcriptomics, xcell, somatic_mutation...
pdacbcm[miRNA, phosphoproteomics, CNV, circular_RNA, ...
broad[transcriptomics]
umich[proteomics, phosphoproteomics]
washu[total_miRNA, CNV, mature_miRNA, precursor_miR...
ucecbcm[miRNA, phosphoproteomics, CNV, circular_RNA, ...
broad[transcriptomics]
umich[proteomics, acetylproteomics, phosphoproteomics]
washu[total_miRNA, CNV, mature_miRNA, precursor_miR...
\n", "
" ], "text/plain": [ " Datatype\n", "Cancer Source \n", "all_cancers harmonized [somatic_mutation, ancestry_prediction]\n", " mssm [clinical]\n", " washu [tumor_purity, hla_typing]\n", "brca bcm [miRNA, phosphoproteomics, CNV, proteomics, tr...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [CNV, cibersort, xcell, transcriptomics]\n", "ccrcc bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "coad bcm [miRNA, phosphoproteomics, CNV, proteomics, tr...\n", " broad [transcriptomics]\n", " umich [proteomics, phosphoproteomics]\n", " washu [CNV, transcriptomics, xcell, somatic_mutation...\n", "gbm bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "hnscc bcm [miRNA, phosphoproteomics, circular_RNA, prote...\n", " broad [transcriptomics]\n", " umich [proteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "lscc bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "luad bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "ov bcm [CNV, proteomics, transcriptomics, phosphoprot...\n", " broad [transcriptomics]\n", " umich [proteomics]\n", " washu [CNV, transcriptomics, xcell, somatic_mutation...\n", "pdac bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR...\n", "ucec bcm [miRNA, phosphoproteomics, CNV, circular_RNA, ...\n", " broad [transcriptomics]\n", " umich [proteomics, acetylproteomics, phosphoproteomics]\n", " washu [total_miRNA, CNV, mature_miRNA, precursor_miR..." ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cptac.get_cancer_options()" ] }, { "cell_type": "markdown", "id": "metropolitan-stranger", "metadata": {}, "source": [ "## Load the BRCA dataset" ] }, { "cell_type": "code", "execution_count": 3, "id": "compliant-rachel", "metadata": {}, "outputs": [], "source": [ "br = cptac.Brca()" ] }, { "cell_type": "markdown", "id": "05f6c2ae", "metadata": {}, "source": [ "We can list which data types are available from which sources." ] }, { "cell_type": "code", "execution_count": 4, "id": "dce3f8b3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Data typeAvailable sources
0CNV[bcm, washu]
1miRNA[bcm]
2phosphoproteomics[bcm, umich]
3proteomics[bcm, umich]
4transcriptomics[bcm, broad, washu]
5ancestry_prediction[harmonized]
6somatic_mutation[harmonized, washu]
7clinical[mssm]
8follow-up[mssm]
9medical_history[mssm]
10acetylproteomics[umich]
11cibersort[washu]
12hla_typing[washu]
13tumor_purity[washu]
14xcell[washu]
\n", "
" ], "text/plain": [ " Data type Available sources\n", "0 CNV [bcm, washu]\n", "1 miRNA [bcm]\n", "2 phosphoproteomics [bcm, umich]\n", "3 proteomics [bcm, umich]\n", "4 transcriptomics [bcm, broad, washu]\n", "5 ancestry_prediction [harmonized]\n", "6 somatic_mutation [harmonized, washu]\n", "7 clinical [mssm]\n", "8 follow-up [mssm]\n", "9 medical_history [mssm]\n", "10 acetylproteomics [umich]\n", "11 cibersort [washu]\n", "12 hla_typing [washu]\n", "13 tumor_purity [washu]\n", "14 xcell [washu]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "br.list_data_sources()" ] }, { "cell_type": "markdown", "id": "distributed-impossible", "metadata": {}, "source": [ "## Download\n", "\n", "Each file will be automatically downloaded when requested, but authentication through your Box account is required to download pancan data.\n", "\n", "See the end of this tutorial for how to download files on a remote computer that doesn't have a web browser for logging into Box." ] }, { "cell_type": "markdown", "id": "ebaaa91f", "metadata": {}, "source": [ "Let's get some data tables." ] }, { "cell_type": "code", "execution_count": 5, "id": "gorgeous-extraction", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nametumor_codediscovery_studytype_of_analyzed_samplesconfirmatory_studytype_of_analyzed_samplesagesexraceethnicityethnicity_race_ancestry_identified...additional_treatment_pharmaceutical_therapy_for_new_tumoradditional_treatment_immuno_for_new_tumornumber_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_loco-regionalnumber_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_metastasisRecurrence-free survival, daysRecurrence-free survival from collection, daysRecurrence status (1, yes; 0, no)Overall survival, daysOverall survival from collection, daysSurvival status (1, dead; 0, alive)
Patient_ID
01BR001BRYesNaNNaNNaN55FemaleBlack or African AmericanNot Hispanic or LatinoNaN...NaNNaNNaNNaNNaNNaN0421.0NaN0.0
01BR008BRYesNaNNaNNaN48FemaleBlack or African AmericanNot Hispanic or LatinoNaN...NaNNaNNaNNaNNaNNaN0NaNNaNNaN
01BR009BRYesNaNNaNNaN64FemaleBlack or African AmericanNot Hispanic or LatinoNaN...NaNNaNNaNNaNNaNNaN0NaNNaNNaN
01BR010BRYesNaNNaNNaN65FemaleBlack or African AmericanNot Hispanic or LatinoNaN...NaNNaNNaNNaNNaNNaN0NaNNaNNaN
01BR015BRYesNaNNaNNaN35FemaleWhiteNot Hispanic or LatinoNaN...NaNNaNNaNNaNNaNNaN0347.0NaN0.0
..................................................................
21BR010BRYesNaNNaNNaN71FemaleWhiteHispanic or LatinoNaN...NaNNaNNaNNaNNaNNaN0327.0NaN0.0
22BR003BRYesNaNNaNNaN30FemaleWhiteNot Hispanic or LatinoNaN...NaNNaNNaNNaNNaNNaN0NaNNaNNaN
22BR005BRYesNaNNaNNaN46FemaleWhiteNot Hispanic or LatinoNaN...NaNNaNNaNNaNNaNNaN0348.0NaN0.0
22BR006BRYesNaNNaNNaN55FemaleBlack or African AmericanNot Hispanic or LatinoNaN...NaNNaNNaNNaNNaNNaN0282.0NaN1.0
604BRYesNaNNaNNaN41FemaleBlack or African AmericanUnknownNaN...NaNNaNNaNNaNNaNNaN0NaNNaNNaN
\n", "

134 rows × 124 columns

\n", "
" ], "text/plain": [ "Name tumor_code discovery_study type_of_analyzed_samples \\\n", "Patient_ID \n", "01BR001 BR Yes NaN \n", "01BR008 BR Yes NaN \n", "01BR009 BR Yes NaN \n", "01BR010 BR Yes NaN \n", "01BR015 BR Yes NaN \n", "... ... ... ... \n", "21BR010 BR Yes NaN \n", "22BR003 BR Yes NaN \n", "22BR005 BR Yes NaN \n", "22BR006 BR Yes NaN \n", "604 BR Yes NaN \n", "\n", "Name confirmatory_study type_of_analyzed_samples age sex \\\n", "Patient_ID \n", "01BR001 NaN NaN 55 Female \n", "01BR008 NaN NaN 48 Female \n", "01BR009 NaN NaN 64 Female \n", "01BR010 NaN NaN 65 Female \n", "01BR015 NaN NaN 35 Female \n", "... ... ... .. ... \n", "21BR010 NaN NaN 71 Female \n", "22BR003 NaN NaN 30 Female \n", "22BR005 NaN NaN 46 Female \n", "22BR006 NaN NaN 55 Female \n", "604 NaN NaN 41 Female \n", "\n", "Name race ethnicity \\\n", "Patient_ID \n", "01BR001 Black or African American Not Hispanic or Latino \n", "01BR008 Black or African American Not Hispanic or Latino \n", "01BR009 Black or African American Not Hispanic or Latino \n", "01BR010 Black or African American Not Hispanic or Latino \n", "01BR015 White Not Hispanic or Latino \n", "... ... ... \n", "21BR010 White Hispanic or Latino \n", "22BR003 White Not Hispanic or Latino \n", "22BR005 White Not Hispanic or Latino \n", "22BR006 Black or African American Not Hispanic or Latino \n", "604 Black or African American Unknown \n", "\n", "Name ethnicity_race_ancestry_identified ... \\\n", "Patient_ID ... \n", "01BR001 NaN ... \n", "01BR008 NaN ... \n", "01BR009 NaN ... \n", "01BR010 NaN ... \n", "01BR015 NaN ... \n", "... ... ... \n", "21BR010 NaN ... \n", "22BR003 NaN ... \n", "22BR005 NaN ... \n", "22BR006 NaN ... \n", "604 NaN ... \n", "\n", "Name additional_treatment_pharmaceutical_therapy_for_new_tumor \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name additional_treatment_immuno_for_new_tumor \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name number_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_loco-regional \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name number_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_metastasis \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name Recurrence-free survival, days \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name Recurrence-free survival from collection, days \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name Recurrence status (1, yes; 0, no) Overall survival, days \\\n", "Patient_ID \n", "01BR001 0 421.0 \n", "01BR008 0 NaN \n", "01BR009 0 NaN \n", "01BR010 0 NaN \n", "01BR015 0 347.0 \n", "... ... ... \n", "21BR010 0 327.0 \n", "22BR003 0 NaN \n", "22BR005 0 348.0 \n", "22BR006 0 282.0 \n", "604 0 NaN \n", "\n", "Name Overall survival from collection, days \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 NaN \n", "... ... \n", "21BR010 NaN \n", "22BR003 NaN \n", "22BR005 NaN \n", "22BR006 NaN \n", "604 NaN \n", "\n", "Name Survival status (1, dead; 0, alive) \n", "Patient_ID \n", "01BR001 0.0 \n", "01BR008 NaN \n", "01BR009 NaN \n", "01BR010 NaN \n", "01BR015 0.0 \n", "... ... \n", "21BR010 0.0 \n", "22BR003 NaN \n", "22BR005 0.0 \n", "22BR006 1.0 \n", "604 NaN \n", "\n", "[134 rows x 124 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "br.get_clinical(source=\"mssm\")\n", "#Note: the syntax above is optional, calling the function as br.get_clinical('mssm') works just as well" ] }, { "cell_type": "code", "execution_count": 6, "id": "accredited-hardwood", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "cptac warning: Your version of cptac (1.5.1) is out-of-date. Latest is 1.5.0. Please run 'pip install --upgrade cptac' to update it. (C:\\Users\\sabme\\anaconda3\\lib\\threading.py, line 910)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameGeneMutationLocationEntrez_Gene_IdNCBI_BuildChromosomeStart_PositionEnd_PositionStrandVariant_Type...HGNC_UniProt_ID(supplied_by_UniProt)HGNC_Ensembl_ID(supplied_by_Ensembl)HGNC_UCSC_ID(supplied_by_UCSC)Oreganno_BuildSimple_Uniprot_alt_uniprot_accessionsdbSNP_TOPMEDHGNC_Entrez_Gene_ID(supplied_by_NCBI)COHORTgetzwashu
Patient_ID
01BR001CCDC136IntronNaN64753.0hg38chr7128815741128815741+SNP...Q96JN2ENSG00000128596uc003vnv.3NaNA4D1K1|A7MCY7|A8MYA7|Q6ZVK7|Q9H8M3|Q9UFE1NaN64753.0BRCATrueTrue
01BR001MYBPC1Splice_SiteNaN4604.0hg38chr12101661264101661264+SNP...Q00872ENSG00000196091uc001tih.4NaNB4DKR5|B7Z8G8|B7ZL02|B7ZL09|B7ZL10|E7ESM5|E7EW...NaN4604.0BRCATrueTrue
01BR001KRT77Silentp.G516G374454.0hg38chr125269135452691354+SNP...Q7Z794ENSG00000189182uc001saw.4NaNQ7RTS8NaN374454.0BRCATrueNaN
01BR001TENM4Missense_Mutationp.E19K26011.0hg38chr117906989079069890+SNP...Q6N022ENSG00000149256uc001ozl.5hg38A6ND26|Q7Z3C7|Q96MS6|Q9P2P4|Q9Y4S2NaN26011.0BRCATrueTrue
01BR001PPFIA1Missense_Mutationp.H795L8500.0hg38chr117035570770355707+SNP...Q13136ENSG00000131626uc001opo.4NaNA6NLE3|Q13135|Q14567|Q8N4I2NaN8500.0BRCATrueTrue
..................................................................
604EXPH5Silentp.Q1690Q23086.0hg38chr11108510437108510437+SNP...Q8NEV8ENSG00000110723uc001pkk.3NaNQ2KHM1|Q9Y4D60.99998407237512742,0.0000159276248725723086.0BRCATrueTrue
604IGSF9BMissense_Mutationp.V493M22997.0hg38chr11133931026133931026+SNP...Q9UPX0ENSG00000080854uc031qfh.2hg38G5EA26NaN22997.0BRCATrueTrue
604ANO2IntronNaN57101.0hg38chr1258277505827750+SNP...Q9NQ90ENSG00000047617uc058kbl.1NaNC4N787|Q9H8470.99998407237512742,.,0.0000159276248725757101.0BRCATrueNaN
604HFM1Silentp.G133G164045.0hg38chr19139418891394188+SNP...A2PYH4ENSG00000162669uc001doa.4NaNB1B0B6|Q8N9Q0NaN164045.0BRCATrueTrue
604SLC44A2Frame_Shift_Delp.E435fs57153.0hg38chr191063638810636407+DEL...Q8IWA5ENSG00000129353uc002mpf.4NaNB2RBB1|B3KNH3|B4DFJ0|F2Q9D7|Q658V1|Q658Z2|Q6PJ...NaN57153.0BRCATrueTrue
\n", "

29017 rows × 131 columns

\n", "
" ], "text/plain": [ "Name Gene Mutation Location Entrez_Gene_Id NCBI_Build \\\n", "Patient_ID \n", "01BR001 CCDC136 Intron NaN 64753.0 hg38 \n", "01BR001 MYBPC1 Splice_Site NaN 4604.0 hg38 \n", "01BR001 KRT77 Silent p.G516G 374454.0 hg38 \n", "01BR001 TENM4 Missense_Mutation p.E19K 26011.0 hg38 \n", "01BR001 PPFIA1 Missense_Mutation p.H795L 8500.0 hg38 \n", "... ... ... ... ... ... \n", "604 EXPH5 Silent p.Q1690Q 23086.0 hg38 \n", "604 IGSF9B Missense_Mutation p.V493M 22997.0 hg38 \n", "604 ANO2 Intron NaN 57101.0 hg38 \n", "604 HFM1 Silent p.G133G 164045.0 hg38 \n", "604 SLC44A2 Frame_Shift_Del p.E435fs 57153.0 hg38 \n", "\n", "Name Chromosome Start_Position End_Position Strand Variant_Type ... \\\n", "Patient_ID ... \n", "01BR001 chr7 128815741 128815741 + SNP ... \n", "01BR001 chr12 101661264 101661264 + SNP ... \n", "01BR001 chr12 52691354 52691354 + SNP ... \n", "01BR001 chr11 79069890 79069890 + SNP ... \n", "01BR001 chr11 70355707 70355707 + SNP ... \n", "... ... ... ... ... ... ... \n", "604 chr11 108510437 108510437 + SNP ... \n", "604 chr11 133931026 133931026 + SNP ... \n", "604 chr12 5827750 5827750 + SNP ... \n", "604 chr1 91394188 91394188 + SNP ... \n", "604 chr19 10636388 10636407 + DEL ... \n", "\n", "Name HGNC_UniProt_ID(supplied_by_UniProt) \\\n", "Patient_ID \n", "01BR001 Q96JN2 \n", "01BR001 Q00872 \n", "01BR001 Q7Z794 \n", "01BR001 Q6N022 \n", "01BR001 Q13136 \n", "... ... \n", "604 Q8NEV8 \n", "604 Q9UPX0 \n", "604 Q9NQ90 \n", "604 A2PYH4 \n", "604 Q8IWA5 \n", "\n", "Name HGNC_Ensembl_ID(supplied_by_Ensembl) \\\n", "Patient_ID \n", "01BR001 ENSG00000128596 \n", "01BR001 ENSG00000196091 \n", "01BR001 ENSG00000189182 \n", "01BR001 ENSG00000149256 \n", "01BR001 ENSG00000131626 \n", "... ... \n", "604 ENSG00000110723 \n", "604 ENSG00000080854 \n", "604 ENSG00000047617 \n", "604 ENSG00000162669 \n", "604 ENSG00000129353 \n", "\n", "Name HGNC_UCSC_ID(supplied_by_UCSC) Oreganno_Build \\\n", "Patient_ID \n", "01BR001 uc003vnv.3 NaN \n", "01BR001 uc001tih.4 NaN \n", "01BR001 uc001saw.4 NaN \n", "01BR001 uc001ozl.5 hg38 \n", "01BR001 uc001opo.4 NaN \n", "... ... ... \n", "604 uc001pkk.3 NaN \n", "604 uc031qfh.2 hg38 \n", "604 uc058kbl.1 NaN \n", "604 uc001doa.4 NaN \n", "604 uc002mpf.4 NaN \n", "\n", "Name Simple_Uniprot_alt_uniprot_accessions \\\n", "Patient_ID \n", "01BR001 A4D1K1|A7MCY7|A8MYA7|Q6ZVK7|Q9H8M3|Q9UFE1 \n", "01BR001 B4DKR5|B7Z8G8|B7ZL02|B7ZL09|B7ZL10|E7ESM5|E7EW... \n", "01BR001 Q7RTS8 \n", "01BR001 A6ND26|Q7Z3C7|Q96MS6|Q9P2P4|Q9Y4S2 \n", "01BR001 A6NLE3|Q13135|Q14567|Q8N4I2 \n", "... ... \n", "604 Q2KHM1|Q9Y4D6 \n", "604 G5EA26 \n", "604 C4N787|Q9H847 \n", "604 B1B0B6|Q8N9Q0 \n", "604 B2RBB1|B3KNH3|B4DFJ0|F2Q9D7|Q658V1|Q658Z2|Q6PJ... \n", "\n", "Name dbSNP_TOPMED \\\n", "Patient_ID \n", "01BR001 NaN \n", "01BR001 NaN \n", "01BR001 NaN \n", "01BR001 NaN \n", "01BR001 NaN \n", "... ... \n", "604 0.99998407237512742,0.00001592762487257 \n", "604 NaN \n", "604 0.99998407237512742,.,0.00001592762487257 \n", "604 NaN \n", "604 NaN \n", "\n", "Name HGNC_Entrez_Gene_ID(supplied_by_NCBI) COHORT getz washu \n", "Patient_ID \n", "01BR001 64753.0 BRCA True True \n", "01BR001 4604.0 BRCA True True \n", "01BR001 374454.0 BRCA True NaN \n", "01BR001 26011.0 BRCA True True \n", "01BR001 8500.0 BRCA True True \n", "... ... ... ... ... \n", "604 23086.0 BRCA True True \n", "604 22997.0 BRCA True True \n", "604 57101.0 BRCA True NaN \n", "604 164045.0 BRCA True True \n", "604 57153.0 BRCA True True \n", "\n", "[29017 rows x 131 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "br.get_somatic_mutation('harmonized')" ] }, { "cell_type": "code", "execution_count": 7, "id": "clear-cliff", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameARF5M6PRESRRAFKBP4NDUFAF7FUCA2DBNDD1SEMA3FCFTRCYP51A1...DDHD1WIZGBF1APOA5WIZLDB1WIZRFX7SWSAP1SVIL
Database_IDENSP00000000233.5ENSP00000000412.3ENSP00000000442.6ENSP00000001008.4ENSP00000002125.4ENSP00000002165.5ENSP00000002501.6ENSP00000002829.3ENSP00000003084.6ENSP00000003100.8...ENSP00000500986.2ENSP00000500993.1ENSP00000501064.1ENSP00000501141.1ENSP00000501256.3ENSP00000501277.1ENSP00000501300.1ENSP00000501317.1ENSP00000501355.1ENSP00000501521.1
Patient_ID
01BR0010.012367-0.945999NaN-0.4781701.135840-0.5127060.750335-0.274824NaN-0.278244...-0.649127-0.580869-0.226667NaNNaN-0.676185-0.068202-0.078207-0.328420NaN
01BR008-0.5143860.4623070.230124-0.5559680.491366-0.656034-1.220890-0.369282-1.036441-0.059327...0.632221NaN0.032873NaNNaN-0.0154590.2274240.325643-0.606240NaN
01BR009-0.210782-0.0850550.380296-0.3894911.255391-0.608007-0.2313180.092870-1.5051950.206595...0.450818NaN-0.341503NaNNaN-0.2202390.1250920.365397-0.167392NaN
01BR0100.1054570.351335-0.322798-0.8216100.241406-0.500140-0.1378240.113791NaN0.498314...-0.423470NaN0.360900NaNNaN-0.451556-0.0988970.208643-0.7290960.670307
01BR015-0.509298-0.874164NaN-0.113804-0.131347-0.4128130.2622100.042333NaN-0.657666...0.406016-0.493869-0.192847NaNNaN0.0836390.966976-0.0126640.081968NaN
..................................................................
21BR0100.528298-0.127929-0.497360-0.1510220.0822880.4472670.1510240.2201940.516739-0.230357...-0.1721510.6366080.267400NaN-0.09507-0.017522-0.220463-0.067717-0.3114460.602422
22BR005-0.5495420.134236NaN0.580773-0.080663-0.056509-0.1486320.260986NaN-0.348578...0.791937NaN0.171712NaNNaN0.083980-0.2000830.155198NaN0.456801
22BR0060.3360920.125742NaN-0.3605100.0861990.470607-0.515990-0.1622471.0030750.342987...-0.080755NaN0.174904-0.353412NaN-0.013793-0.253829-0.117960NaN1.094966
CPT000814-0.5189950.2625820.2779800.1375050.600041-1.0412300.513974-0.012011NaN-0.411714...-2.011008NaN0.035445NaNNaN-1.3859420.620827NaNNaNNaN
CPT0018460.652513-0.1652680.809801-1.1082630.5575760.0078540.213734-0.721577-2.6956510.296581...-0.057923NaN0.252335NaNNaN0.040722-0.006118-0.2513600.443203NaN
\n", "

125 rows × 12922 columns

\n", "
" ], "text/plain": [ "Name ARF5 M6PR ESRRA \\\n", "Database_ID ENSP00000000233.5 ENSP00000000412.3 ENSP00000000442.6 \n", "Patient_ID \n", "01BR001 0.012367 -0.945999 NaN \n", "01BR008 -0.514386 0.462307 0.230124 \n", "01BR009 -0.210782 -0.085055 0.380296 \n", "01BR010 0.105457 0.351335 -0.322798 \n", "01BR015 -0.509298 -0.874164 NaN \n", "... ... ... ... \n", "21BR010 0.528298 -0.127929 -0.497360 \n", "22BR005 -0.549542 0.134236 NaN \n", "22BR006 0.336092 0.125742 NaN \n", "CPT000814 -0.518995 0.262582 0.277980 \n", "CPT001846 0.652513 -0.165268 0.809801 \n", "\n", "Name FKBP4 NDUFAF7 FUCA2 \\\n", "Database_ID ENSP00000001008.4 ENSP00000002125.4 ENSP00000002165.5 \n", "Patient_ID \n", "01BR001 -0.478170 1.135840 -0.512706 \n", "01BR008 -0.555968 0.491366 -0.656034 \n", "01BR009 -0.389491 1.255391 -0.608007 \n", "01BR010 -0.821610 0.241406 -0.500140 \n", "01BR015 -0.113804 -0.131347 -0.412813 \n", "... ... ... ... \n", "21BR010 -0.151022 0.082288 0.447267 \n", "22BR005 0.580773 -0.080663 -0.056509 \n", "22BR006 -0.360510 0.086199 0.470607 \n", "CPT000814 0.137505 0.600041 -1.041230 \n", "CPT001846 -1.108263 0.557576 0.007854 \n", "\n", "Name DBNDD1 SEMA3F CFTR \\\n", "Database_ID ENSP00000002501.6 ENSP00000002829.3 ENSP00000003084.6 \n", "Patient_ID \n", "01BR001 0.750335 -0.274824 NaN \n", "01BR008 -1.220890 -0.369282 -1.036441 \n", "01BR009 -0.231318 0.092870 -1.505195 \n", "01BR010 -0.137824 0.113791 NaN \n", "01BR015 0.262210 0.042333 NaN \n", "... ... ... ... \n", "21BR010 0.151024 0.220194 0.516739 \n", "22BR005 -0.148632 0.260986 NaN \n", "22BR006 -0.515990 -0.162247 1.003075 \n", "CPT000814 0.513974 -0.012011 NaN \n", "CPT001846 0.213734 -0.721577 -2.695651 \n", "\n", "Name CYP51A1 ... DDHD1 WIZ \\\n", "Database_ID ENSP00000003100.8 ... ENSP00000500986.2 ENSP00000500993.1 \n", "Patient_ID ... \n", "01BR001 -0.278244 ... -0.649127 -0.580869 \n", "01BR008 -0.059327 ... 0.632221 NaN \n", "01BR009 0.206595 ... 0.450818 NaN \n", "01BR010 0.498314 ... -0.423470 NaN \n", "01BR015 -0.657666 ... 0.406016 -0.493869 \n", "... ... ... ... ... \n", "21BR010 -0.230357 ... -0.172151 0.636608 \n", "22BR005 -0.348578 ... 0.791937 NaN \n", "22BR006 0.342987 ... -0.080755 NaN \n", "CPT000814 -0.411714 ... -2.011008 NaN \n", "CPT001846 0.296581 ... -0.057923 NaN \n", "\n", "Name GBF1 APOA5 WIZ \\\n", "Database_ID ENSP00000501064.1 ENSP00000501141.1 ENSP00000501256.3 \n", "Patient_ID \n", "01BR001 -0.226667 NaN NaN \n", "01BR008 0.032873 NaN NaN \n", "01BR009 -0.341503 NaN NaN \n", "01BR010 0.360900 NaN NaN \n", "01BR015 -0.192847 NaN NaN \n", "... ... ... ... \n", "21BR010 0.267400 NaN -0.09507 \n", "22BR005 0.171712 NaN NaN \n", "22BR006 0.174904 -0.353412 NaN \n", "CPT000814 0.035445 NaN NaN \n", "CPT001846 0.252335 NaN NaN \n", "\n", "Name LDB1 WIZ RFX7 \\\n", "Database_ID ENSP00000501277.1 ENSP00000501300.1 ENSP00000501317.1 \n", "Patient_ID \n", "01BR001 -0.676185 -0.068202 -0.078207 \n", "01BR008 -0.015459 0.227424 0.325643 \n", "01BR009 -0.220239 0.125092 0.365397 \n", "01BR010 -0.451556 -0.098897 0.208643 \n", "01BR015 0.083639 0.966976 -0.012664 \n", "... ... ... ... \n", "21BR010 -0.017522 -0.220463 -0.067717 \n", "22BR005 0.083980 -0.200083 0.155198 \n", "22BR006 -0.013793 -0.253829 -0.117960 \n", "CPT000814 -1.385942 0.620827 NaN \n", "CPT001846 0.040722 -0.006118 -0.251360 \n", "\n", "Name SWSAP1 SVIL \n", "Database_ID ENSP00000501355.1 ENSP00000501521.1 \n", "Patient_ID \n", "01BR001 -0.328420 NaN \n", "01BR008 -0.606240 NaN \n", "01BR009 -0.167392 NaN \n", "01BR010 -0.729096 0.670307 \n", "01BR015 0.081968 NaN \n", "... ... ... \n", "21BR010 -0.311446 0.602422 \n", "22BR005 NaN 0.456801 \n", "22BR006 NaN 1.094966 \n", "CPT000814 NaN NaN \n", "CPT001846 0.443203 NaN \n", "\n", "[125 rows x 12922 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "br.get_proteomics(source=\"umich\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }