# Tutorial 6: CPTAC basics review


First, we'll import the package.

In [1]:
import cptac

We can list which cancers we have data for.

In [2]:
cptac.get_cancer_options()

Unnamed: 0_level_0,Unnamed: 1_level_0,Datatype
Cancer,Source,Unnamed: 2_level_1
all_cancers,harmonized,"[somatic_mutation, ancestry_prediction]"
all_cancers,mssm,[clinical]
all_cancers,washu,"[tumor_purity, hla_typing]"
brca,bcm,"[miRNA, phosphoproteomics, CNV, proteomics, tr..."
brca,broad,[transcriptomics]
brca,umich,"[proteomics, acetylproteomics, phosphoproteomics]"
brca,washu,"[CNV, cibersort, xcell, transcriptomics]"
ccrcc,bcm,"[miRNA, phosphoproteomics, CNV, circular_RNA, ..."
ccrcc,broad,[transcriptomics]
ccrcc,umich,"[proteomics, phosphoproteomics]"


## Load the BRCA dataset

In [3]:
br = cptac.Brca()

We can list which data types are available from which sources.

In [4]:
br.list_data_sources()

Unnamed: 0,Data type,Available sources
0,CNV,"[bcm, washu]"
1,miRNA,[bcm]
2,phosphoproteomics,"[bcm, umich]"
3,proteomics,"[bcm, umich]"
4,transcriptomics,"[bcm, broad, washu]"
5,ancestry_prediction,[harmonized]
6,somatic_mutation,"[harmonized, washu]"
7,clinical,[mssm]
8,follow-up,[mssm]
9,medical_history,[mssm]


## Download

Each file will be automatically downloaded when requested, but authentication through your Box account is required to download pancan data.

See the end of this tutorial for how to download files on a remote computer that doesn't have a web browser for logging into Box.

Let's get some data tables.

In [5]:
br.get_clinical(source="mssm")
#Note: the syntax above is optional, calling the function as br.get_clinical('mssm') works just as well

Name,tumor_code,discovery_study,type_of_analyzed_samples,confirmatory_study,type_of_analyzed_samples,age,sex,race,ethnicity,ethnicity_race_ancestry_identified,...,additional_treatment_pharmaceutical_therapy_for_new_tumor,additional_treatment_immuno_for_new_tumor,number_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_loco-regional,number_of_days_from_date_of_initial_pathologic_diagnosis_to_date_of_additional_surgery_for_new_tumor_event_metastasis,"Recurrence-free survival, days","Recurrence-free survival from collection, days","Recurrence status (1, yes; 0, no)","Overall survival, days","Overall survival from collection, days","Survival status (1, dead; 0, alive)"
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01BR001,BR,Yes,,,,55,Female,Black or African American,Not Hispanic or Latino,,...,,,,,,,0,421.0,,0.0
01BR008,BR,Yes,,,,48,Female,Black or African American,Not Hispanic or Latino,,...,,,,,,,0,,,
01BR009,BR,Yes,,,,64,Female,Black or African American,Not Hispanic or Latino,,...,,,,,,,0,,,
01BR010,BR,Yes,,,,65,Female,Black or African American,Not Hispanic or Latino,,...,,,,,,,0,,,
01BR015,BR,Yes,,,,35,Female,White,Not Hispanic or Latino,,...,,,,,,,0,347.0,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21BR010,BR,Yes,,,,71,Female,White,Hispanic or Latino,,...,,,,,,,0,327.0,,0.0
22BR003,BR,Yes,,,,30,Female,White,Not Hispanic or Latino,,...,,,,,,,0,,,
22BR005,BR,Yes,,,,46,Female,White,Not Hispanic or Latino,,...,,,,,,,0,348.0,,0.0
22BR006,BR,Yes,,,,55,Female,Black or African American,Not Hispanic or Latino,,...,,,,,,,0,282.0,,1.0


In [6]:
br.get_somatic_mutation('harmonized')



Name,Gene,Mutation,Location,Entrez_Gene_Id,NCBI_Build,Chromosome,Start_Position,End_Position,Strand,Variant_Type,...,HGNC_UniProt_ID(supplied_by_UniProt),HGNC_Ensembl_ID(supplied_by_Ensembl),HGNC_UCSC_ID(supplied_by_UCSC),Oreganno_Build,Simple_Uniprot_alt_uniprot_accessions,dbSNP_TOPMED,HGNC_Entrez_Gene_ID(supplied_by_NCBI),COHORT,getz,washu
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01BR001,CCDC136,Intron,,64753.0,hg38,chr7,128815741,128815741,+,SNP,...,Q96JN2,ENSG00000128596,uc003vnv.3,,A4D1K1|A7MCY7|A8MYA7|Q6ZVK7|Q9H8M3|Q9UFE1,,64753.0,BRCA,True,True
01BR001,MYBPC1,Splice_Site,,4604.0,hg38,chr12,101661264,101661264,+,SNP,...,Q00872,ENSG00000196091,uc001tih.4,,B4DKR5|B7Z8G8|B7ZL02|B7ZL09|B7ZL10|E7ESM5|E7EW...,,4604.0,BRCA,True,True
01BR001,KRT77,Silent,p.G516G,374454.0,hg38,chr12,52691354,52691354,+,SNP,...,Q7Z794,ENSG00000189182,uc001saw.4,,Q7RTS8,,374454.0,BRCA,True,
01BR001,TENM4,Missense_Mutation,p.E19K,26011.0,hg38,chr11,79069890,79069890,+,SNP,...,Q6N022,ENSG00000149256,uc001ozl.5,hg38,A6ND26|Q7Z3C7|Q96MS6|Q9P2P4|Q9Y4S2,,26011.0,BRCA,True,True
01BR001,PPFIA1,Missense_Mutation,p.H795L,8500.0,hg38,chr11,70355707,70355707,+,SNP,...,Q13136,ENSG00000131626,uc001opo.4,,A6NLE3|Q13135|Q14567|Q8N4I2,,8500.0,BRCA,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
604,EXPH5,Silent,p.Q1690Q,23086.0,hg38,chr11,108510437,108510437,+,SNP,...,Q8NEV8,ENSG00000110723,uc001pkk.3,,Q2KHM1|Q9Y4D6,"0.99998407237512742,0.00001592762487257",23086.0,BRCA,True,True
604,IGSF9B,Missense_Mutation,p.V493M,22997.0,hg38,chr11,133931026,133931026,+,SNP,...,Q9UPX0,ENSG00000080854,uc031qfh.2,hg38,G5EA26,,22997.0,BRCA,True,True
604,ANO2,Intron,,57101.0,hg38,chr12,5827750,5827750,+,SNP,...,Q9NQ90,ENSG00000047617,uc058kbl.1,,C4N787|Q9H847,"0.99998407237512742,.,0.00001592762487257",57101.0,BRCA,True,
604,HFM1,Silent,p.G133G,164045.0,hg38,chr1,91394188,91394188,+,SNP,...,A2PYH4,ENSG00000162669,uc001doa.4,,B1B0B6|Q8N9Q0,,164045.0,BRCA,True,True


In [7]:
br.get_proteomics(source="umich")

Name,ARF5,M6PR,ESRRA,FKBP4,NDUFAF7,FUCA2,DBNDD1,SEMA3F,CFTR,CYP51A1,...,DDHD1,WIZ,GBF1,APOA5,WIZ,LDB1,WIZ,RFX7,SWSAP1,SVIL
Database_ID,ENSP00000000233.5,ENSP00000000412.3,ENSP00000000442.6,ENSP00000001008.4,ENSP00000002125.4,ENSP00000002165.5,ENSP00000002501.6,ENSP00000002829.3,ENSP00000003084.6,ENSP00000003100.8,...,ENSP00000500986.2,ENSP00000500993.1,ENSP00000501064.1,ENSP00000501141.1,ENSP00000501256.3,ENSP00000501277.1,ENSP00000501300.1,ENSP00000501317.1,ENSP00000501355.1,ENSP00000501521.1
Patient_ID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
01BR001,0.012367,-0.945999,,-0.478170,1.135840,-0.512706,0.750335,-0.274824,,-0.278244,...,-0.649127,-0.580869,-0.226667,,,-0.676185,-0.068202,-0.078207,-0.328420,
01BR008,-0.514386,0.462307,0.230124,-0.555968,0.491366,-0.656034,-1.220890,-0.369282,-1.036441,-0.059327,...,0.632221,,0.032873,,,-0.015459,0.227424,0.325643,-0.606240,
01BR009,-0.210782,-0.085055,0.380296,-0.389491,1.255391,-0.608007,-0.231318,0.092870,-1.505195,0.206595,...,0.450818,,-0.341503,,,-0.220239,0.125092,0.365397,-0.167392,
01BR010,0.105457,0.351335,-0.322798,-0.821610,0.241406,-0.500140,-0.137824,0.113791,,0.498314,...,-0.423470,,0.360900,,,-0.451556,-0.098897,0.208643,-0.729096,0.670307
01BR015,-0.509298,-0.874164,,-0.113804,-0.131347,-0.412813,0.262210,0.042333,,-0.657666,...,0.406016,-0.493869,-0.192847,,,0.083639,0.966976,-0.012664,0.081968,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21BR010,0.528298,-0.127929,-0.497360,-0.151022,0.082288,0.447267,0.151024,0.220194,0.516739,-0.230357,...,-0.172151,0.636608,0.267400,,-0.09507,-0.017522,-0.220463,-0.067717,-0.311446,0.602422
22BR005,-0.549542,0.134236,,0.580773,-0.080663,-0.056509,-0.148632,0.260986,,-0.348578,...,0.791937,,0.171712,,,0.083980,-0.200083,0.155198,,0.456801
22BR006,0.336092,0.125742,,-0.360510,0.086199,0.470607,-0.515990,-0.162247,1.003075,0.342987,...,-0.080755,,0.174904,-0.353412,,-0.013793,-0.253829,-0.117960,,1.094966
CPT000814,-0.518995,0.262582,0.277980,0.137505,0.600041,-1.041230,0.513974,-0.012011,,-0.411714,...,-2.011008,,0.035445,,,-1.385942,0.620827,,,
