{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/cellar/users/btsui/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" interactivity=interactivity, compiler=compiler, result=result)\n"
]
}
],
"source": [
"inVcfDir='/data/cellardata/users/btsui/dbsnp/Homo_sapiens/All_20170710.f1_byte2_not_00.vcf.gz' \n",
"vcfDf=pd.read_csv(inVcfDir,sep='\\t',header=None)\n",
"vcfDf.columns=['Chr','Pos','RsId','RefBase','AltBase','','','Annot']\n",
"vcfDf['Chr']=vcfDf['Chr'].astype(np.str)\n"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {},
"outputs": [],
"source": [
"top100GeneDf=pd.read_pickle('./top_lgg_somatic_sites.pickle').drop_duplicates('vcfIndex').head(n=100)"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {},
"outputs": [],
"source": [
"#vcfDf[vcfDf.Pos==29944050]"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {},
"outputs": [],
"source": [
"#top100GeneDf"
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
"
\n",
" \n",
" \n",
" \n",
" 70 | \n",
" 0.002137 | \n",
" 16 | \n",
" 2.042345 | \n",
" 0.495708 | \n",
" (1, 237591774) | \n",
" 1 | \n",
" 237591774 | \n",
" 3 | \n",
"
\n",
" \n",
" 79 | \n",
" 0.007407 | \n",
" 0 | \n",
" 1.307692 | \n",
" 0.481203 | \n",
" (12, 57099758) | \n",
" 12 | \n",
" 57099758 | \n",
" 3 | \n",
"
\n",
" \n",
" 86 | \n",
" 0.224100 | \n",
" 13 | \n",
" 1.728489 | \n",
" 0.588009 | \n",
" (3, 75630794) | \n",
" 3 | \n",
" 75630794 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n",
"79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n",
"86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n",
"\n",
" tcga_wxs_count \n",
"70 3 \n",
"79 3 \n",
"86 3 "
]
},
"execution_count": 169,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top100GeneDf[(top100GeneDf['rnaseq_n']<20)&(top100GeneDf['auprc']<0.5)]#.mean(axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 170,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
"
\n",
" \n",
" \n",
" \n",
" 30 | \n",
" 0.116652 | \n",
" 481 | \n",
" 300.219466 | \n",
" 0.601301 | \n",
" (7, 55165350) | \n",
" 7 | \n",
" 55165350 | \n",
" 6 | \n",
"
\n",
" \n",
" 43 | \n",
" 0.072989 | \n",
" 401 | \n",
" 747.178357 | \n",
" 0.569311 | \n",
" (6, 31270232) | \n",
" 6 | \n",
" 31270232 | \n",
" 5 | \n",
"
\n",
" \n",
" 56 | \n",
" 0.035856 | \n",
" 355 | \n",
" 17.948375 | \n",
" 0.429899 | \n",
" (22, 42127537) | \n",
" 22 | \n",
" 42127537 | \n",
" 4 | \n",
"
\n",
" \n",
" 64 | \n",
" 0.025849 | \n",
" 454 | \n",
" 1296.377395 | \n",
" 0.552955 | \n",
" (6, 29944124) | \n",
" 6 | \n",
" 29944124 | \n",
" 4 | \n",
"
\n",
" \n",
" 66 | \n",
" 0.028601 | \n",
" 484 | \n",
" 1758.949290 | \n",
" 0.509784 | \n",
" (6, 31356377) | \n",
" 6 | \n",
" 31356377 | \n",
" 4 | \n",
"
\n",
" \n",
" 69 | \n",
" 0.069974 | \n",
" 516 | \n",
" 2769.652672 | \n",
" 0.599747 | \n",
" (6, 29944135) | \n",
" 6 | \n",
" 29944135 | \n",
" 3 | \n",
"
\n",
" \n",
" 70 | \n",
" 0.002137 | \n",
" 16 | \n",
" 2.042345 | \n",
" 0.495708 | \n",
" (1, 237591774) | \n",
" 1 | \n",
" 237591774 | \n",
" 3 | \n",
"
\n",
" \n",
" 74 | \n",
" 0.026316 | \n",
" 436 | \n",
" 162.306796 | \n",
" 0.784810 | \n",
" (6, 32664926) | \n",
" 6 | \n",
" 32664926 | \n",
" 3 | \n",
"
\n",
" \n",
" 79 | \n",
" 0.007407 | \n",
" 0 | \n",
" 1.307692 | \n",
" 0.481203 | \n",
" (12, 57099758) | \n",
" 12 | \n",
" 57099758 | \n",
" 3 | \n",
"
\n",
" \n",
" 84 | \n",
" 0.061988 | \n",
" 524 | \n",
" 2200.085878 | \n",
" 0.573877 | \n",
" (6, 29942916) | \n",
" 6 | \n",
" 29942916 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n",
"43 0.072989 401 747.178357 0.569311 (6, 31270232) 6 31270232 \n",
"56 0.035856 355 17.948375 0.429899 (22, 42127537) 22 42127537 \n",
"64 0.025849 454 1296.377395 0.552955 (6, 29944124) 6 29944124 \n",
"66 0.028601 484 1758.949290 0.509784 (6, 31356377) 6 31356377 \n",
"69 0.069974 516 2769.652672 0.599747 (6, 29944135) 6 29944135 \n",
"70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n",
"74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n",
"79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n",
"84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n",
"\n",
" tcga_wxs_count \n",
"30 6 \n",
"43 5 \n",
"56 4 \n",
"64 4 \n",
"66 4 \n",
"69 3 \n",
"70 3 \n",
"74 3 \n",
"79 3 \n",
"84 3 "
]
},
"execution_count": 170,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top100GeneDf[top100GeneDf['auprc']<0.2]"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
"
\n",
" \n",
" \n",
" \n",
" 43 | \n",
" 0.072989 | \n",
" 401 | \n",
" 747.178357 | \n",
" 0.569311 | \n",
" (6, 31270232) | \n",
" 6 | \n",
" 31270232 | \n",
" 5 | \n",
"
\n",
" \n",
" 56 | \n",
" 0.035856 | \n",
" 355 | \n",
" 17.948375 | \n",
" 0.429899 | \n",
" (22, 42127537) | \n",
" 22 | \n",
" 42127537 | \n",
" 4 | \n",
"
\n",
" \n",
" 64 | \n",
" 0.025849 | \n",
" 454 | \n",
" 1296.377395 | \n",
" 0.552955 | \n",
" (6, 29944124) | \n",
" 6 | \n",
" 29944124 | \n",
" 4 | \n",
"
\n",
" \n",
" 66 | \n",
" 0.028601 | \n",
" 484 | \n",
" 1758.949290 | \n",
" 0.509784 | \n",
" (6, 31356377) | \n",
" 6 | \n",
" 31356377 | \n",
" 4 | \n",
"
\n",
" \n",
" 69 | \n",
" 0.069974 | \n",
" 516 | \n",
" 2769.652672 | \n",
" 0.599747 | \n",
" (6, 29944135) | \n",
" 6 | \n",
" 29944135 | \n",
" 3 | \n",
"
\n",
" \n",
" 70 | \n",
" 0.002137 | \n",
" 16 | \n",
" 2.042345 | \n",
" 0.495708 | \n",
" (1, 237591774) | \n",
" 1 | \n",
" 237591774 | \n",
" 3 | \n",
"
\n",
" \n",
" 74 | \n",
" 0.026316 | \n",
" 436 | \n",
" 162.306796 | \n",
" 0.784810 | \n",
" (6, 32664926) | \n",
" 6 | \n",
" 32664926 | \n",
" 3 | \n",
"
\n",
" \n",
" 79 | \n",
" 0.007407 | \n",
" 0 | \n",
" 1.307692 | \n",
" 0.481203 | \n",
" (12, 57099758) | \n",
" 12 | \n",
" 57099758 | \n",
" 3 | \n",
"
\n",
" \n",
" 84 | \n",
" 0.061988 | \n",
" 524 | \n",
" 2200.085878 | \n",
" 0.573877 | \n",
" (6, 29942916) | \n",
" 6 | \n",
" 29942916 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"43 0.072989 401 747.178357 0.569311 (6, 31270232) 6 31270232 \n",
"56 0.035856 355 17.948375 0.429899 (22, 42127537) 22 42127537 \n",
"64 0.025849 454 1296.377395 0.552955 (6, 29944124) 6 29944124 \n",
"66 0.028601 484 1758.949290 0.509784 (6, 31356377) 6 31356377 \n",
"69 0.069974 516 2769.652672 0.599747 (6, 29944135) 6 29944135 \n",
"70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n",
"74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n",
"79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n",
"84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n",
"\n",
" tcga_wxs_count \n",
"43 5 \n",
"56 4 \n",
"64 4 \n",
"66 4 \n",
"69 3 \n",
"70 3 \n",
"74 3 \n",
"79 3 \n",
"84 3 "
]
},
"execution_count": 176,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top100GeneDf[top100GeneDf.auprc<0.1]"
]
},
{
"cell_type": "code",
"execution_count": 173,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.93"
]
},
"execution_count": 173,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"((top100GeneDf.rnaseq_n)>0).mean()"
]
},
{
"cell_type": "code",
"execution_count": 171,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.998211 | \n",
" 519 | \n",
" 133.822519 | \n",
" 0.984290 | \n",
" (2, 208248388) | \n",
" 2 | \n",
" 208248388 | \n",
" 371 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.543616 | \n",
" 520 | \n",
" 122.076336 | \n",
" 0.945266 | \n",
" (17, 7673803) | \n",
" 17 | \n",
" 7673803 | \n",
" 59 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.865149 | \n",
" 519 | \n",
" 133.375954 | \n",
" 0.896410 | \n",
" (2, 208248389) | \n",
" 2 | \n",
" 208248389 | \n",
" 38 | \n",
"
\n",
" \n",
" 5 | \n",
" 1.000000 | \n",
" 524 | \n",
" 353.841603 | \n",
" 1.000000 | \n",
" (15, 90088606) | \n",
" 15 | \n",
" 90088606 | \n",
" 15 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.954091 | \n",
" 520 | \n",
" 121.372849 | \n",
" 0.948889 | \n",
" (17, 7673802) | \n",
" 17 | \n",
" 7673802 | \n",
" 15 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.991546 | \n",
" 457 | \n",
" 38.312977 | \n",
" 0.507767 | \n",
" (14, 32092134) | \n",
" 14 | \n",
" 32092134 | \n",
" 14 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.738417 | \n",
" 0 | \n",
" 0.001931 | \n",
" 0.500000 | \n",
" (8, 142877758) | \n",
" 8 | \n",
" 142877758 | \n",
" 11 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.989537 | \n",
" 91 | \n",
" 5.262948 | \n",
" 0.503589 | \n",
" (X, 24789042) | \n",
" X | \n",
" 24789042 | \n",
" 11 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.999940 | \n",
" 513 | \n",
" 210.776718 | \n",
" 0.996541 | \n",
" (1, 109690516) | \n",
" 1 | \n",
" 109690516 | \n",
" 10 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.975154 | \n",
" 496 | \n",
" 59.135496 | \n",
" 0.999370 | \n",
" (17, 7674872) | \n",
" 17 | \n",
" 7674872 | \n",
" 10 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.684925 | \n",
" 286 | \n",
" 16.242366 | \n",
" 0.613307 | \n",
" (6, 29944050) | \n",
" 6 | \n",
" 29944050 | \n",
" 9 | \n",
"
\n",
" \n",
" 13 | \n",
" 0.998226 | \n",
" 524 | \n",
" 1291.374046 | \n",
" 0.694932 | \n",
" (X, 24788994) | \n",
" X | \n",
" 24788994 | \n",
" 9 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.994127 | \n",
" 431 | \n",
" 36.320537 | \n",
" 0.621622 | \n",
" (12, 6018369) | \n",
" 12 | \n",
" 6018369 | \n",
" 9 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.779858 | \n",
" 509 | \n",
" 107.205374 | \n",
" 0.765568 | \n",
" (17, 7675088) | \n",
" 17 | \n",
" 7675088 | \n",
" 8 | \n",
"
\n",
" \n",
" 15 | \n",
" 0.895074 | \n",
" 520 | \n",
" 124.636711 | \n",
" 0.929366 | \n",
" (17, 7674220) | \n",
" 17 | \n",
" 7674220 | \n",
" 8 | \n",
"
\n",
" \n",
" 16 | \n",
" 0.850000 | \n",
" 510 | \n",
" 91.690840 | \n",
" 0.998394 | \n",
" (17, 7675076) | \n",
" 17 | \n",
" 7675076 | \n",
" 8 | \n",
"
\n",
" \n",
" 18 | \n",
" 0.924295 | \n",
" 502 | \n",
" 79.051527 | \n",
" 0.615436 | \n",
" (12, 6018901) | \n",
" 12 | \n",
" 6018901 | \n",
" 7 | \n",
"
\n",
" \n",
" 19 | \n",
" 0.967688 | \n",
" 524 | \n",
" 950.933206 | \n",
" 0.671795 | \n",
" (6, 31271836) | \n",
" 6 | \n",
" 31271836 | \n",
" 7 | \n",
"
\n",
" \n",
" 20 | \n",
" 0.524973 | \n",
" 502 | \n",
" 1370.792233 | \n",
" 0.456735 | \n",
" (6, 31356729) | \n",
" 6 | \n",
" 31356729 | \n",
" 7 | \n",
"
\n",
" \n",
" 21 | \n",
" 0.966475 | \n",
" 1 | \n",
" 0.392720 | \n",
" 0.500000 | \n",
" (7, 117548682) | \n",
" 7 | \n",
" 117548682 | \n",
" 7 | \n",
"
\n",
" \n",
" 22 | \n",
" 0.843596 | \n",
" 522 | \n",
" 2902.068702 | \n",
" 0.470996 | \n",
" (6, 29943406) | \n",
" 6 | \n",
" 29943406 | \n",
" 7 | \n",
"
\n",
" \n",
" 23 | \n",
" 0.624032 | \n",
" 480 | \n",
" 2319.403475 | \n",
" 0.517632 | \n",
" (6, 29943422) | \n",
" 6 | \n",
" 29943422 | \n",
" 7 | \n",
"
\n",
" \n",
" 24 | \n",
" 1.000000 | \n",
" 520 | \n",
" 123.956023 | \n",
" 1.000000 | \n",
" (17, 7674221) | \n",
" 17 | \n",
" 7674221 | \n",
" 7 | \n",
"
\n",
" \n",
" 25 | \n",
" 0.976915 | \n",
" 3 | \n",
" 0.319915 | \n",
" 0.498495 | \n",
" (7, 142750675) | \n",
" 7 | \n",
" 142750675 | \n",
" 7 | \n",
"
\n",
" \n",
" 31 | \n",
" 0.999499 | \n",
" 8 | \n",
" 2.092742 | \n",
" 0.781609 | \n",
" (12, 2685853) | \n",
" 12 | \n",
" 2685853 | \n",
" 6 | \n",
"
\n",
" \n",
" 33 | \n",
" 0.867048 | \n",
" 315 | \n",
" 18.980583 | \n",
" 0.640445 | \n",
" (6, 29944118) | \n",
" 6 | \n",
" 29944118 | \n",
" 6 | \n",
"
\n",
" \n",
" 32 | \n",
" 0.961239 | \n",
" 3 | \n",
" 0.492366 | \n",
" 0.479700 | \n",
" (4, 144120554) | \n",
" 4 | \n",
" 144120554 | \n",
" 6 | \n",
"
\n",
" \n",
" 30 | \n",
" 0.116652 | \n",
" 481 | \n",
" 300.219466 | \n",
" 0.601301 | \n",
" (7, 55165350) | \n",
" 7 | \n",
" 55165350 | \n",
" 6 | \n",
"
\n",
" \n",
" 28 | \n",
" 0.501689 | \n",
" 472 | \n",
" 84.025794 | \n",
" 0.500000 | \n",
" (9, 128257486) | \n",
" 9 | \n",
" 128257486 | \n",
" 6 | \n",
"
\n",
" \n",
" 27 | \n",
" 0.777019 | \n",
" 518 | \n",
" 121.843511 | \n",
" 0.875674 | \n",
" (17, 7673776) | \n",
" 17 | \n",
" 7673776 | \n",
" 6 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 92 | \n",
" 1.000000 | \n",
" 517 | \n",
" 117.526718 | \n",
" 1.000000 | \n",
" (17, 7674256) | \n",
" 17 | \n",
" 7674256 | \n",
" 3 | \n",
"
\n",
" \n",
" 93 | \n",
" 0.493328 | \n",
" 512 | \n",
" 82.120229 | \n",
" 0.692187 | \n",
" (17, 7673704) | \n",
" 17 | \n",
" 7673704 | \n",
" 3 | \n",
"
\n",
" \n",
" 97 | \n",
" 0.671406 | \n",
" 501 | \n",
" 94.854127 | \n",
" 0.794175 | \n",
" (17, 31350209) | \n",
" 17 | \n",
" 31350209 | \n",
" 3 | \n",
"
\n",
" \n",
" 95 | \n",
" 0.666459 | \n",
" 523 | \n",
" 986.114504 | \n",
" 0.500538 | \n",
" (8, 100709671) | \n",
" 8 | \n",
" 100709671 | \n",
" 3 | \n",
"
\n",
" \n",
" 96 | \n",
" 0.951125 | \n",
" 0 | \n",
" 0.047244 | \n",
" 0.509217 | \n",
" (3, 183959847) | \n",
" 3 | \n",
" 183959847 | \n",
" 3 | \n",
"
\n",
" \n",
" 86 | \n",
" 0.224100 | \n",
" 13 | \n",
" 1.728489 | \n",
" 0.588009 | \n",
" (3, 75630794) | \n",
" 3 | \n",
" 75630794 | \n",
" 3 | \n",
"
\n",
" \n",
" 98 | \n",
" 0.530668 | \n",
" 518 | \n",
" 380.994275 | \n",
" 0.738304 | \n",
" (5, 68295269) | \n",
" 5 | \n",
" 68295269 | \n",
" 3 | \n",
"
\n",
" \n",
" 99 | \n",
" 0.870781 | \n",
" 495 | \n",
" 23.572519 | \n",
" 0.462571 | \n",
" (22, 24627926) | \n",
" 22 | \n",
" 24627926 | \n",
" 3 | \n",
"
\n",
" \n",
" 100 | \n",
" 0.754902 | \n",
" 0 | \n",
" 0.001957 | \n",
" 0.500000 | \n",
" (20, 8788776) | \n",
" 20 | \n",
" 8788776 | \n",
" 3 | \n",
"
\n",
" \n",
" 101 | \n",
" 0.289047 | \n",
" 205 | \n",
" 13.273622 | \n",
" 0.551120 | \n",
" (6, 31271875) | \n",
" 6 | \n",
" 31271875 | \n",
" 3 | \n",
"
\n",
" \n",
" 102 | \n",
" 0.318593 | \n",
" 524 | \n",
" 977.551527 | \n",
" 0.504434 | \n",
" (6, 31271839) | \n",
" 6 | \n",
" 31271839 | \n",
" 3 | \n",
"
\n",
" \n",
" 87 | \n",
" 0.549532 | \n",
" 103 | \n",
" 7.149510 | \n",
" 0.523889 | \n",
" (3, 49686483) | \n",
" 3 | \n",
" 49686483 | \n",
" 3 | \n",
"
\n",
" \n",
" 83 | \n",
" 0.965023 | \n",
" 510 | \n",
" 2437.399610 | \n",
" 0.856497 | \n",
" (6, 29942858) | \n",
" 6 | \n",
" 29942858 | \n",
" 3 | \n",
"
\n",
" \n",
" 85 | \n",
" 0.756602 | \n",
" 29 | \n",
" 2.614504 | \n",
" 0.669605 | \n",
" (3, 75630855) | \n",
" 3 | \n",
" 75630855 | \n",
" 3 | \n",
"
\n",
" \n",
" 74 | \n",
" 0.026316 | \n",
" 436 | \n",
" 162.306796 | \n",
" 0.784810 | \n",
" (6, 32664926) | \n",
" 6 | \n",
" 32664926 | \n",
" 3 | \n",
"
\n",
" \n",
" 68 | \n",
" 0.380695 | \n",
" 512 | \n",
" 2479.959924 | \n",
" 0.497330 | \n",
" (6, 29944132) | \n",
" 6 | \n",
" 29944132 | \n",
" 3 | \n",
"
\n",
" \n",
" 69 | \n",
" 0.069974 | \n",
" 516 | \n",
" 2769.652672 | \n",
" 0.599747 | \n",
" (6, 29944135) | \n",
" 6 | \n",
" 29944135 | \n",
" 3 | \n",
"
\n",
" \n",
" 70 | \n",
" 0.002137 | \n",
" 16 | \n",
" 2.042345 | \n",
" 0.495708 | \n",
" (1, 237591774) | \n",
" 1 | \n",
" 237591774 | \n",
" 3 | \n",
"
\n",
" \n",
" 71 | \n",
" 0.808434 | \n",
" 524 | \n",
" 3416.650763 | \n",
" 0.442027 | \n",
" (6, 29944376) | \n",
" 6 | \n",
" 29944376 | \n",
" 3 | \n",
"
\n",
" \n",
" 72 | \n",
" 0.232668 | \n",
" 524 | \n",
" 3618.646947 | \n",
" 0.603043 | \n",
" (6, 29944151) | \n",
" 6 | \n",
" 29944151 | \n",
" 3 | \n",
"
\n",
" \n",
" 73 | \n",
" 0.741291 | \n",
" 506 | \n",
" 315.198473 | \n",
" 0.621487 | \n",
" (6, 32664883) | \n",
" 6 | \n",
" 32664883 | \n",
" 3 | \n",
"
\n",
" \n",
" 75 | \n",
" 0.863203 | \n",
" 395 | \n",
" 31.936902 | \n",
" 0.488386 | \n",
" (10, 4999206) | \n",
" 10 | \n",
" 4999206 | \n",
" 3 | \n",
"
\n",
" \n",
" 84 | \n",
" 0.061988 | \n",
" 524 | \n",
" 2200.085878 | \n",
" 0.573877 | \n",
" (6, 29942916) | \n",
" 6 | \n",
" 29942916 | \n",
" 3 | \n",
"
\n",
" \n",
" 76 | \n",
" 0.815908 | \n",
" 493 | \n",
" 215.211832 | \n",
" 0.928361 | \n",
" (6, 32661393) | \n",
" 6 | \n",
" 32661393 | \n",
" 3 | \n",
"
\n",
" \n",
" 77 | \n",
" 0.380747 | \n",
" 480 | \n",
" 191.682692 | \n",
" 0.596527 | \n",
" (6, 32661384) | \n",
" 6 | \n",
" 32661384 | \n",
" 3 | \n",
"
\n",
" \n",
" 78 | \n",
" 0.778990 | \n",
" 27 | \n",
" 2.581081 | \n",
" 0.645061 | \n",
" (6, 32661333) | \n",
" 6 | \n",
" 32661333 | \n",
" 3 | \n",
"
\n",
" \n",
" 79 | \n",
" 0.007407 | \n",
" 0 | \n",
" 1.307692 | \n",
" 0.481203 | \n",
" (12, 57099758) | \n",
" 12 | \n",
" 57099758 | \n",
" 3 | \n",
"
\n",
" \n",
" 81 | \n",
" 0.993934 | \n",
" 523 | \n",
" 1127.133588 | \n",
" 0.582806 | \n",
" (16, 2106849) | \n",
" 16 | \n",
" 2106849 | \n",
" 3 | \n",
"
\n",
" \n",
" 82 | \n",
" 0.995522 | \n",
" 517 | \n",
" 1391.395753 | \n",
" 0.989221 | \n",
" (6, 29942795) | \n",
" 6 | \n",
" 29942795 | \n",
" 3 | \n",
"
\n",
" \n",
" 103 | \n",
" 0.953258 | \n",
" 522 | \n",
" 2469.611111 | \n",
" 0.582500 | \n",
" (6, 31356399) | \n",
" 6 | \n",
" 31356399 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
100 rows × 8 columns
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n",
"1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n",
"2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n",
"5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n",
"6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n",
"3 0.991546 457 38.312977 0.507767 (14, 32092134) 14 32092134 \n",
"7 0.738417 0 0.001931 0.500000 (8, 142877758) 8 142877758 \n",
"8 0.989537 91 5.262948 0.503589 (X, 24789042) X 24789042 \n",
"9 0.999940 513 210.776718 0.996541 (1, 109690516) 1 109690516 \n",
"10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n",
"11 0.684925 286 16.242366 0.613307 (6, 29944050) 6 29944050 \n",
"13 0.998226 524 1291.374046 0.694932 (X, 24788994) X 24788994 \n",
"14 0.994127 431 36.320537 0.621622 (12, 6018369) 12 6018369 \n",
"17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n",
"15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n",
"16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n",
"18 0.924295 502 79.051527 0.615436 (12, 6018901) 12 6018901 \n",
"19 0.967688 524 950.933206 0.671795 (6, 31271836) 6 31271836 \n",
"20 0.524973 502 1370.792233 0.456735 (6, 31356729) 6 31356729 \n",
"21 0.966475 1 0.392720 0.500000 (7, 117548682) 7 117548682 \n",
"22 0.843596 522 2902.068702 0.470996 (6, 29943406) 6 29943406 \n",
"23 0.624032 480 2319.403475 0.517632 (6, 29943422) 6 29943422 \n",
"24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n",
"25 0.976915 3 0.319915 0.498495 (7, 142750675) 7 142750675 \n",
"31 0.999499 8 2.092742 0.781609 (12, 2685853) 12 2685853 \n",
"33 0.867048 315 18.980583 0.640445 (6, 29944118) 6 29944118 \n",
"32 0.961239 3 0.492366 0.479700 (4, 144120554) 4 144120554 \n",
"30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n",
"28 0.501689 472 84.025794 0.500000 (9, 128257486) 9 128257486 \n",
"27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n",
".. ... ... ... ... ... .. ... \n",
"92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n",
"93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n",
"97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n",
"95 0.666459 523 986.114504 0.500538 (8, 100709671) 8 100709671 \n",
"96 0.951125 0 0.047244 0.509217 (3, 183959847) 3 183959847 \n",
"86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n",
"98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n",
"99 0.870781 495 23.572519 0.462571 (22, 24627926) 22 24627926 \n",
"100 0.754902 0 0.001957 0.500000 (20, 8788776) 20 8788776 \n",
"101 0.289047 205 13.273622 0.551120 (6, 31271875) 6 31271875 \n",
"102 0.318593 524 977.551527 0.504434 (6, 31271839) 6 31271839 \n",
"87 0.549532 103 7.149510 0.523889 (3, 49686483) 3 49686483 \n",
"83 0.965023 510 2437.399610 0.856497 (6, 29942858) 6 29942858 \n",
"85 0.756602 29 2.614504 0.669605 (3, 75630855) 3 75630855 \n",
"74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n",
"68 0.380695 512 2479.959924 0.497330 (6, 29944132) 6 29944132 \n",
"69 0.069974 516 2769.652672 0.599747 (6, 29944135) 6 29944135 \n",
"70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n",
"71 0.808434 524 3416.650763 0.442027 (6, 29944376) 6 29944376 \n",
"72 0.232668 524 3618.646947 0.603043 (6, 29944151) 6 29944151 \n",
"73 0.741291 506 315.198473 0.621487 (6, 32664883) 6 32664883 \n",
"75 0.863203 395 31.936902 0.488386 (10, 4999206) 10 4999206 \n",
"84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n",
"76 0.815908 493 215.211832 0.928361 (6, 32661393) 6 32661393 \n",
"77 0.380747 480 191.682692 0.596527 (6, 32661384) 6 32661384 \n",
"78 0.778990 27 2.581081 0.645061 (6, 32661333) 6 32661333 \n",
"79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n",
"81 0.993934 523 1127.133588 0.582806 (16, 2106849) 16 2106849 \n",
"82 0.995522 517 1391.395753 0.989221 (6, 29942795) 6 29942795 \n",
"103 0.953258 522 2469.611111 0.582500 (6, 31356399) 6 31356399 \n",
"\n",
" tcga_wxs_count \n",
"0 371 \n",
"1 59 \n",
"2 38 \n",
"5 15 \n",
"6 15 \n",
"3 14 \n",
"7 11 \n",
"8 11 \n",
"9 10 \n",
"10 10 \n",
"11 9 \n",
"13 9 \n",
"14 9 \n",
"17 8 \n",
"15 8 \n",
"16 8 \n",
"18 7 \n",
"19 7 \n",
"20 7 \n",
"21 7 \n",
"22 7 \n",
"23 7 \n",
"24 7 \n",
"25 7 \n",
"31 6 \n",
"33 6 \n",
"32 6 \n",
"30 6 \n",
"28 6 \n",
"27 6 \n",
".. ... \n",
"92 3 \n",
"93 3 \n",
"97 3 \n",
"95 3 \n",
"96 3 \n",
"86 3 \n",
"98 3 \n",
"99 3 \n",
"100 3 \n",
"101 3 \n",
"102 3 \n",
"87 3 \n",
"83 3 \n",
"85 3 \n",
"74 3 \n",
"68 3 \n",
"69 3 \n",
"70 3 \n",
"71 3 \n",
"72 3 \n",
"73 3 \n",
"75 3 \n",
"84 3 \n",
"76 3 \n",
"77 3 \n",
"78 3 \n",
"79 3 \n",
"81 3 \n",
"82 3 \n",
"103 3 \n",
"\n",
"[100 rows x 8 columns]"
]
},
"execution_count": 171,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top100GeneDf.sort_values('tcga_wxs_count',ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"vcfDf['GeneName']=vcfDf.Annot.str.extract('GENEINFO=(\\w+)',expand=False)"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"posToGeneNameS=vcfDf.drop_duplicates(['Chr','Pos']).set_index(['Chr','Pos'])['GeneName']"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [],
"source": [
"top100GeneDf['Chr']=top100GeneDf.vcfIndex.apply(lambda L:L[0])\n",
"top100GeneDf['Pos']=top100GeneDf.vcfIndex.apply(lambda L:int(L[1]))"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [],
"source": [
"top100GeneDf['Gene']=posToGeneNameS[top100GeneDf.set_index(['Chr','Pos']).index].values"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
" Gene | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.998211 | \n",
" 519 | \n",
" 133.822519 | \n",
" 0.984290 | \n",
" (2, 208248388) | \n",
" 2 | \n",
" 208248388 | \n",
" 371 | \n",
" IDH1 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.543616 | \n",
" 520 | \n",
" 122.076336 | \n",
" 0.945266 | \n",
" (17, 7673803) | \n",
" 17 | \n",
" 7673803 | \n",
" 59 | \n",
" TP53 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.865149 | \n",
" 519 | \n",
" 133.375954 | \n",
" 0.896410 | \n",
" (2, 208248389) | \n",
" 2 | \n",
" 208248389 | \n",
" 38 | \n",
" IDH1 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.991546 | \n",
" 457 | \n",
" 38.312977 | \n",
" 0.507767 | \n",
" (14, 32092134) | \n",
" 14 | \n",
" 32092134 | \n",
" 14 | \n",
" ARHGAP5 | \n",
"
\n",
" \n",
" 5 | \n",
" 1.000000 | \n",
" 524 | \n",
" 353.841603 | \n",
" 1.000000 | \n",
" (15, 90088606) | \n",
" 15 | \n",
" 90088606 | \n",
" 15 | \n",
" IDH2 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.954091 | \n",
" 520 | \n",
" 121.372849 | \n",
" 0.948889 | \n",
" (17, 7673802) | \n",
" 17 | \n",
" 7673802 | \n",
" 15 | \n",
" TP53 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.738417 | \n",
" 0 | \n",
" 0.001931 | \n",
" 0.500000 | \n",
" (8, 142877758) | \n",
" 8 | \n",
" 142877758 | \n",
" 11 | \n",
" CYP11B1 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.989537 | \n",
" 91 | \n",
" 5.262948 | \n",
" 0.503589 | \n",
" (X, 24789042) | \n",
" X | \n",
" 24789042 | \n",
" 11 | \n",
" POLA1 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.999940 | \n",
" 513 | \n",
" 210.776718 | \n",
" 0.996541 | \n",
" (1, 109690516) | \n",
" 1 | \n",
" 109690516 | \n",
" 10 | \n",
" GSTM1 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.975154 | \n",
" 496 | \n",
" 59.135496 | \n",
" 0.999370 | \n",
" (17, 7674872) | \n",
" 17 | \n",
" 7674872 | \n",
" 10 | \n",
" TP53 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.684925 | \n",
" 286 | \n",
" 16.242366 | \n",
" 0.613307 | \n",
" (6, 29944050) | \n",
" 6 | \n",
" 29944050 | \n",
" 9 | \n",
" HLA | \n",
"
\n",
" \n",
" 13 | \n",
" 0.998226 | \n",
" 524 | \n",
" 1291.374046 | \n",
" 0.694932 | \n",
" (X, 24788994) | \n",
" X | \n",
" 24788994 | \n",
" 9 | \n",
" POLA1 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.994127 | \n",
" 431 | \n",
" 36.320537 | \n",
" 0.621622 | \n",
" (12, 6018369) | \n",
" 12 | \n",
" 6018369 | \n",
" 9 | \n",
" VWF | \n",
"
\n",
" \n",
" 15 | \n",
" 0.895074 | \n",
" 520 | \n",
" 124.636711 | \n",
" 0.929366 | \n",
" (17, 7674220) | \n",
" 17 | \n",
" 7674220 | \n",
" 8 | \n",
" TP53 | \n",
"
\n",
" \n",
" 16 | \n",
" 0.850000 | \n",
" 510 | \n",
" 91.690840 | \n",
" 0.998394 | \n",
" (17, 7675076) | \n",
" 17 | \n",
" 7675076 | \n",
" 8 | \n",
" TP53 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.779858 | \n",
" 509 | \n",
" 107.205374 | \n",
" 0.765568 | \n",
" (17, 7675088) | \n",
" 17 | \n",
" 7675088 | \n",
" 8 | \n",
" TP53 | \n",
"
\n",
" \n",
" 18 | \n",
" 0.924295 | \n",
" 502 | \n",
" 79.051527 | \n",
" 0.615436 | \n",
" (12, 6018901) | \n",
" 12 | \n",
" 6018901 | \n",
" 7 | \n",
" VWF | \n",
"
\n",
" \n",
" 19 | \n",
" 0.967688 | \n",
" 524 | \n",
" 950.933206 | \n",
" 0.671795 | \n",
" (6, 31271836) | \n",
" 6 | \n",
" 31271836 | \n",
" 7 | \n",
" HLA | \n",
"
\n",
" \n",
" 20 | \n",
" 0.524973 | \n",
" 502 | \n",
" 1370.792233 | \n",
" 0.456735 | \n",
" (6, 31356729) | \n",
" 6 | \n",
" 31356729 | \n",
" 7 | \n",
" MIR6891 | \n",
"
\n",
" \n",
" 21 | \n",
" 0.966475 | \n",
" 1 | \n",
" 0.392720 | \n",
" 0.500000 | \n",
" (7, 117548682) | \n",
" 7 | \n",
" 117548682 | \n",
" 7 | \n",
" CFTR | \n",
"
\n",
" \n",
" 22 | \n",
" 0.843596 | \n",
" 522 | \n",
" 2902.068702 | \n",
" 0.470996 | \n",
" (6, 29943406) | \n",
" 6 | \n",
" 29943406 | \n",
" 7 | \n",
" HLA | \n",
"
\n",
" \n",
" 23 | \n",
" 0.624032 | \n",
" 480 | \n",
" 2319.403475 | \n",
" 0.517632 | \n",
" (6, 29943422) | \n",
" 6 | \n",
" 29943422 | \n",
" 7 | \n",
" HLA | \n",
"
\n",
" \n",
" 24 | \n",
" 1.000000 | \n",
" 520 | \n",
" 123.956023 | \n",
" 1.000000 | \n",
" (17, 7674221) | \n",
" 17 | \n",
" 7674221 | \n",
" 7 | \n",
" TP53 | \n",
"
\n",
" \n",
" 25 | \n",
" 0.976915 | \n",
" 3 | \n",
" 0.319915 | \n",
" 0.498495 | \n",
" (7, 142750675) | \n",
" 7 | \n",
" 142750675 | \n",
" 7 | \n",
" PRSS1 | \n",
"
\n",
" \n",
" 26 | \n",
" 0.881503 | \n",
" 212 | \n",
" 9.790076 | \n",
" 0.500000 | \n",
" (7, 152238825) | \n",
" 7 | \n",
" 152238825 | \n",
" 6 | \n",
" KMT2C | \n",
"
\n",
" \n",
" 27 | \n",
" 0.777019 | \n",
" 518 | \n",
" 121.843511 | \n",
" 0.875674 | \n",
" (17, 7673776) | \n",
" 17 | \n",
" 7673776 | \n",
" 6 | \n",
" TP53 | \n",
"
\n",
" \n",
" 28 | \n",
" 0.501689 | \n",
" 472 | \n",
" 84.025794 | \n",
" 0.500000 | \n",
" (9, 128257486) | \n",
" 9 | \n",
" 128257486 | \n",
" 6 | \n",
" GOLGA2 | \n",
"
\n",
" \n",
" 29 | \n",
" 0.962142 | \n",
" 4 | \n",
" 0.215953 | \n",
" 0.501053 | \n",
" (7, 142750600) | \n",
" 7 | \n",
" 142750600 | \n",
" 6 | \n",
" PRSS1 | \n",
"
\n",
" \n",
" 30 | \n",
" 0.116652 | \n",
" 481 | \n",
" 300.219466 | \n",
" 0.601301 | \n",
" (7, 55165350) | \n",
" 7 | \n",
" 55165350 | \n",
" 6 | \n",
" EGFR | \n",
"
\n",
" \n",
" 31 | \n",
" 0.999499 | \n",
" 8 | \n",
" 2.092742 | \n",
" 0.781609 | \n",
" (12, 2685853) | \n",
" 12 | \n",
" 2685853 | \n",
" 6 | \n",
" CACNA1C | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 73 | \n",
" 0.741291 | \n",
" 506 | \n",
" 315.198473 | \n",
" 0.621487 | \n",
" (6, 32664883) | \n",
" 6 | \n",
" 32664883 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 74 | \n",
" 0.026316 | \n",
" 436 | \n",
" 162.306796 | \n",
" 0.784810 | \n",
" (6, 32664926) | \n",
" 6 | \n",
" 32664926 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 75 | \n",
" 0.863203 | \n",
" 395 | \n",
" 31.936902 | \n",
" 0.488386 | \n",
" (10, 4999206) | \n",
" 10 | \n",
" 4999206 | \n",
" 3 | \n",
" AKR1C2 | \n",
"
\n",
" \n",
" 76 | \n",
" 0.815908 | \n",
" 493 | \n",
" 215.211832 | \n",
" 0.928361 | \n",
" (6, 32661393) | \n",
" 6 | \n",
" 32661393 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 77 | \n",
" 0.380747 | \n",
" 480 | \n",
" 191.682692 | \n",
" 0.596527 | \n",
" (6, 32661384) | \n",
" 6 | \n",
" 32661384 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 78 | \n",
" 0.778990 | \n",
" 27 | \n",
" 2.581081 | \n",
" 0.645061 | \n",
" (6, 32661333) | \n",
" 6 | \n",
" 32661333 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 79 | \n",
" 0.007407 | \n",
" 0 | \n",
" 1.307692 | \n",
" 0.481203 | \n",
" (12, 57099758) | \n",
" 12 | \n",
" 57099758 | \n",
" 3 | \n",
" STAT6 | \n",
"
\n",
" \n",
" 81 | \n",
" 0.993934 | \n",
" 523 | \n",
" 1127.133588 | \n",
" 0.582806 | \n",
" (16, 2106849) | \n",
" 16 | \n",
" 2106849 | \n",
" 3 | \n",
" MIR6511B1 | \n",
"
\n",
" \n",
" 82 | \n",
" 0.995522 | \n",
" 517 | \n",
" 1391.395753 | \n",
" 0.989221 | \n",
" (6, 29942795) | \n",
" 6 | \n",
" 29942795 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 83 | \n",
" 0.965023 | \n",
" 510 | \n",
" 2437.399610 | \n",
" 0.856497 | \n",
" (6, 29942858) | \n",
" 6 | \n",
" 29942858 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 84 | \n",
" 0.061988 | \n",
" 524 | \n",
" 2200.085878 | \n",
" 0.573877 | \n",
" (6, 29942916) | \n",
" 6 | \n",
" 29942916 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 85 | \n",
" 0.756602 | \n",
" 29 | \n",
" 2.614504 | \n",
" 0.669605 | \n",
" (3, 75630855) | \n",
" 3 | \n",
" 75630855 | \n",
" 3 | \n",
" LOC107986102 | \n",
"
\n",
" \n",
" 86 | \n",
" 0.224100 | \n",
" 13 | \n",
" 1.728489 | \n",
" 0.588009 | \n",
" (3, 75630794) | \n",
" 3 | \n",
" 75630794 | \n",
" 3 | \n",
" LOC107986102 | \n",
"
\n",
" \n",
" 87 | \n",
" 0.549532 | \n",
" 103 | \n",
" 7.149510 | \n",
" 0.523889 | \n",
" (3, 49686483) | \n",
" 3 | \n",
" 49686483 | \n",
" 3 | \n",
" MST1 | \n",
"
\n",
" \n",
" 88 | \n",
" 0.683612 | \n",
" 519 | \n",
" 1443.973282 | \n",
" 0.499179 | \n",
" (6, 29943463) | \n",
" 6 | \n",
" 29943463 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 89 | \n",
" 0.758810 | \n",
" 35 | \n",
" 3.059961 | \n",
" 0.520303 | \n",
" (6, 29943667) | \n",
" 6 | \n",
" 29943667 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 90 | \n",
" 0.858112 | \n",
" 523 | \n",
" 66.984733 | \n",
" 0.899413 | \n",
" (3, 179234284) | \n",
" 3 | \n",
" 179234284 | \n",
" 3 | \n",
" PIK3CA | \n",
"
\n",
" \n",
" 91 | \n",
" 0.844742 | \n",
" 513 | \n",
" 88.143130 | \n",
" 0.874750 | \n",
" (17, 7674888) | \n",
" 17 | \n",
" 7674888 | \n",
" 3 | \n",
" TP53 | \n",
"
\n",
" \n",
" 92 | \n",
" 1.000000 | \n",
" 517 | \n",
" 117.526718 | \n",
" 1.000000 | \n",
" (17, 7674256) | \n",
" 17 | \n",
" 7674256 | \n",
" 3 | \n",
" TP53 | \n",
"
\n",
" \n",
" 93 | \n",
" 0.493328 | \n",
" 512 | \n",
" 82.120229 | \n",
" 0.692187 | \n",
" (17, 7673704) | \n",
" 17 | \n",
" 7673704 | \n",
" 3 | \n",
" TP53 | \n",
"
\n",
" \n",
" 94 | \n",
" 0.403970 | \n",
" 55 | \n",
" 3.786275 | \n",
" 0.490953 | \n",
" (17, 21416556) | \n",
" 17 | \n",
" 21416556 | \n",
" 3 | \n",
" KCNJ12 | \n",
"
\n",
" \n",
" 95 | \n",
" 0.666459 | \n",
" 523 | \n",
" 986.114504 | \n",
" 0.500538 | \n",
" (8, 100709671) | \n",
" 8 | \n",
" 100709671 | \n",
" 3 | \n",
" PABPC1 | \n",
"
\n",
" \n",
" 96 | \n",
" 0.951125 | \n",
" 0 | \n",
" 0.047244 | \n",
" 0.509217 | \n",
" (3, 183959847) | \n",
" 3 | \n",
" 183959847 | \n",
" 3 | \n",
" ABCC5 | \n",
"
\n",
" \n",
" 97 | \n",
" 0.671406 | \n",
" 501 | \n",
" 94.854127 | \n",
" 0.794175 | \n",
" (17, 31350209) | \n",
" 17 | \n",
" 31350209 | \n",
" 3 | \n",
" NF1 | \n",
"
\n",
" \n",
" 98 | \n",
" 0.530668 | \n",
" 518 | \n",
" 380.994275 | \n",
" 0.738304 | \n",
" (5, 68295269) | \n",
" 5 | \n",
" 68295269 | \n",
" 3 | \n",
" PIK3R1 | \n",
"
\n",
" \n",
" 99 | \n",
" 0.870781 | \n",
" 495 | \n",
" 23.572519 | \n",
" 0.462571 | \n",
" (22, 24627926) | \n",
" 22 | \n",
" 24627926 | \n",
" 3 | \n",
" GGT1 | \n",
"
\n",
" \n",
" 100 | \n",
" 0.754902 | \n",
" 0 | \n",
" 0.001957 | \n",
" 0.500000 | \n",
" (20, 8788776) | \n",
" 20 | \n",
" 8788776 | \n",
" 3 | \n",
" PLCB1 | \n",
"
\n",
" \n",
" 101 | \n",
" 0.289047 | \n",
" 205 | \n",
" 13.273622 | \n",
" 0.551120 | \n",
" (6, 31271875) | \n",
" 6 | \n",
" 31271875 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 102 | \n",
" 0.318593 | \n",
" 524 | \n",
" 977.551527 | \n",
" 0.504434 | \n",
" (6, 31271839) | \n",
" 6 | \n",
" 31271839 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 103 | \n",
" 0.953258 | \n",
" 522 | \n",
" 2469.611111 | \n",
" 0.582500 | \n",
" (6, 31356399) | \n",
" 6 | \n",
" 31356399 | \n",
" 3 | \n",
" MIR6891 | \n",
"
\n",
" \n",
"
\n",
"
100 rows × 9 columns
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n",
"1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n",
"2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n",
"3 0.991546 457 38.312977 0.507767 (14, 32092134) 14 32092134 \n",
"5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n",
"6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n",
"7 0.738417 0 0.001931 0.500000 (8, 142877758) 8 142877758 \n",
"8 0.989537 91 5.262948 0.503589 (X, 24789042) X 24789042 \n",
"9 0.999940 513 210.776718 0.996541 (1, 109690516) 1 109690516 \n",
"10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n",
"11 0.684925 286 16.242366 0.613307 (6, 29944050) 6 29944050 \n",
"13 0.998226 524 1291.374046 0.694932 (X, 24788994) X 24788994 \n",
"14 0.994127 431 36.320537 0.621622 (12, 6018369) 12 6018369 \n",
"15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n",
"16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n",
"17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n",
"18 0.924295 502 79.051527 0.615436 (12, 6018901) 12 6018901 \n",
"19 0.967688 524 950.933206 0.671795 (6, 31271836) 6 31271836 \n",
"20 0.524973 502 1370.792233 0.456735 (6, 31356729) 6 31356729 \n",
"21 0.966475 1 0.392720 0.500000 (7, 117548682) 7 117548682 \n",
"22 0.843596 522 2902.068702 0.470996 (6, 29943406) 6 29943406 \n",
"23 0.624032 480 2319.403475 0.517632 (6, 29943422) 6 29943422 \n",
"24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n",
"25 0.976915 3 0.319915 0.498495 (7, 142750675) 7 142750675 \n",
"26 0.881503 212 9.790076 0.500000 (7, 152238825) 7 152238825 \n",
"27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n",
"28 0.501689 472 84.025794 0.500000 (9, 128257486) 9 128257486 \n",
"29 0.962142 4 0.215953 0.501053 (7, 142750600) 7 142750600 \n",
"30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n",
"31 0.999499 8 2.092742 0.781609 (12, 2685853) 12 2685853 \n",
".. ... ... ... ... ... .. ... \n",
"73 0.741291 506 315.198473 0.621487 (6, 32664883) 6 32664883 \n",
"74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n",
"75 0.863203 395 31.936902 0.488386 (10, 4999206) 10 4999206 \n",
"76 0.815908 493 215.211832 0.928361 (6, 32661393) 6 32661393 \n",
"77 0.380747 480 191.682692 0.596527 (6, 32661384) 6 32661384 \n",
"78 0.778990 27 2.581081 0.645061 (6, 32661333) 6 32661333 \n",
"79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n",
"81 0.993934 523 1127.133588 0.582806 (16, 2106849) 16 2106849 \n",
"82 0.995522 517 1391.395753 0.989221 (6, 29942795) 6 29942795 \n",
"83 0.965023 510 2437.399610 0.856497 (6, 29942858) 6 29942858 \n",
"84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n",
"85 0.756602 29 2.614504 0.669605 (3, 75630855) 3 75630855 \n",
"86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n",
"87 0.549532 103 7.149510 0.523889 (3, 49686483) 3 49686483 \n",
"88 0.683612 519 1443.973282 0.499179 (6, 29943463) 6 29943463 \n",
"89 0.758810 35 3.059961 0.520303 (6, 29943667) 6 29943667 \n",
"90 0.858112 523 66.984733 0.899413 (3, 179234284) 3 179234284 \n",
"91 0.844742 513 88.143130 0.874750 (17, 7674888) 17 7674888 \n",
"92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n",
"93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n",
"94 0.403970 55 3.786275 0.490953 (17, 21416556) 17 21416556 \n",
"95 0.666459 523 986.114504 0.500538 (8, 100709671) 8 100709671 \n",
"96 0.951125 0 0.047244 0.509217 (3, 183959847) 3 183959847 \n",
"97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n",
"98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n",
"99 0.870781 495 23.572519 0.462571 (22, 24627926) 22 24627926 \n",
"100 0.754902 0 0.001957 0.500000 (20, 8788776) 20 8788776 \n",
"101 0.289047 205 13.273622 0.551120 (6, 31271875) 6 31271875 \n",
"102 0.318593 524 977.551527 0.504434 (6, 31271839) 6 31271839 \n",
"103 0.953258 522 2469.611111 0.582500 (6, 31356399) 6 31356399 \n",
"\n",
" tcga_wxs_count Gene \n",
"0 371 IDH1 \n",
"1 59 TP53 \n",
"2 38 IDH1 \n",
"3 14 ARHGAP5 \n",
"5 15 IDH2 \n",
"6 15 TP53 \n",
"7 11 CYP11B1 \n",
"8 11 POLA1 \n",
"9 10 GSTM1 \n",
"10 10 TP53 \n",
"11 9 HLA \n",
"13 9 POLA1 \n",
"14 9 VWF \n",
"15 8 TP53 \n",
"16 8 TP53 \n",
"17 8 TP53 \n",
"18 7 VWF \n",
"19 7 HLA \n",
"20 7 MIR6891 \n",
"21 7 CFTR \n",
"22 7 HLA \n",
"23 7 HLA \n",
"24 7 TP53 \n",
"25 7 PRSS1 \n",
"26 6 KMT2C \n",
"27 6 TP53 \n",
"28 6 GOLGA2 \n",
"29 6 PRSS1 \n",
"30 6 EGFR \n",
"31 6 CACNA1C \n",
".. ... ... \n",
"73 3 HLA \n",
"74 3 HLA \n",
"75 3 AKR1C2 \n",
"76 3 HLA \n",
"77 3 HLA \n",
"78 3 HLA \n",
"79 3 STAT6 \n",
"81 3 MIR6511B1 \n",
"82 3 HLA \n",
"83 3 HLA \n",
"84 3 HLA \n",
"85 3 LOC107986102 \n",
"86 3 LOC107986102 \n",
"87 3 MST1 \n",
"88 3 HLA \n",
"89 3 HLA \n",
"90 3 PIK3CA \n",
"91 3 TP53 \n",
"92 3 TP53 \n",
"93 3 TP53 \n",
"94 3 KCNJ12 \n",
"95 3 PABPC1 \n",
"96 3 ABCC5 \n",
"97 3 NF1 \n",
"98 3 PIK3R1 \n",
"99 3 GGT1 \n",
"100 3 PLCB1 \n",
"101 3 HLA \n",
"102 3 HLA \n",
"103 3 MIR6891 \n",
"\n",
"[100 rows x 9 columns]"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top100GeneDf"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [],
"source": [
"geneDf=top100GeneDf#.groupby('Gene')#.head(n=1)"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
" Gene | \n",
" Classification | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.998211 | \n",
" 519 | \n",
" 133.822519 | \n",
" 0.984290 | \n",
" (2, 208248388) | \n",
" 2 | \n",
" 208248388 | \n",
" 371 | \n",
" IDH1 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 1 | \n",
" 0.543616 | \n",
" 520 | \n",
" 122.076336 | \n",
" 0.945266 | \n",
" (17, 7673803) | \n",
" 17 | \n",
" 7673803 | \n",
" 59 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 2 | \n",
" 0.865149 | \n",
" 519 | \n",
" 133.375954 | \n",
" 0.896410 | \n",
" (2, 208248389) | \n",
" 2 | \n",
" 208248389 | \n",
" 38 | \n",
" IDH1 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 3 | \n",
" 0.991546 | \n",
" 457 | \n",
" 38.312977 | \n",
" 0.507767 | \n",
" (14, 32092134) | \n",
" 14 | \n",
" 32092134 | \n",
" 14 | \n",
" ARHGAP5 | \n",
" NaN | \n",
"
\n",
" \n",
" 5 | \n",
" 1.000000 | \n",
" 524 | \n",
" 353.841603 | \n",
" 1.000000 | \n",
" (15, 90088606) | \n",
" 15 | \n",
" 90088606 | \n",
" 15 | \n",
" IDH2 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 6 | \n",
" 0.954091 | \n",
" 520 | \n",
" 121.372849 | \n",
" 0.948889 | \n",
" (17, 7673802) | \n",
" 17 | \n",
" 7673802 | \n",
" 15 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 7 | \n",
" 0.738417 | \n",
" 0 | \n",
" 0.001931 | \n",
" 0.500000 | \n",
" (8, 142877758) | \n",
" 8 | \n",
" 142877758 | \n",
" 11 | \n",
" CYP11B1 | \n",
" NaN | \n",
"
\n",
" \n",
" 8 | \n",
" 0.989537 | \n",
" 91 | \n",
" 5.262948 | \n",
" 0.503589 | \n",
" (X, 24789042) | \n",
" X | \n",
" 24789042 | \n",
" 11 | \n",
" POLA1 | \n",
" NaN | \n",
"
\n",
" \n",
" 9 | \n",
" 0.999940 | \n",
" 513 | \n",
" 210.776718 | \n",
" 0.996541 | \n",
" (1, 109690516) | \n",
" 1 | \n",
" 109690516 | \n",
" 10 | \n",
" GSTM1 | \n",
" NaN | \n",
"
\n",
" \n",
" 10 | \n",
" 0.975154 | \n",
" 496 | \n",
" 59.135496 | \n",
" 0.999370 | \n",
" (17, 7674872) | \n",
" 17 | \n",
" 7674872 | \n",
" 10 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 11 | \n",
" 0.684925 | \n",
" 286 | \n",
" 16.242366 | \n",
" 0.613307 | \n",
" (6, 29944050) | \n",
" 6 | \n",
" 29944050 | \n",
" 9 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 13 | \n",
" 0.998226 | \n",
" 524 | \n",
" 1291.374046 | \n",
" 0.694932 | \n",
" (X, 24788994) | \n",
" X | \n",
" 24788994 | \n",
" 9 | \n",
" POLA1 | \n",
" NaN | \n",
"
\n",
" \n",
" 14 | \n",
" 0.994127 | \n",
" 431 | \n",
" 36.320537 | \n",
" 0.621622 | \n",
" (12, 6018369) | \n",
" 12 | \n",
" 6018369 | \n",
" 9 | \n",
" VWF | \n",
" NaN | \n",
"
\n",
" \n",
" 15 | \n",
" 0.895074 | \n",
" 520 | \n",
" 124.636711 | \n",
" 0.929366 | \n",
" (17, 7674220) | \n",
" 17 | \n",
" 7674220 | \n",
" 8 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 16 | \n",
" 0.850000 | \n",
" 510 | \n",
" 91.690840 | \n",
" 0.998394 | \n",
" (17, 7675076) | \n",
" 17 | \n",
" 7675076 | \n",
" 8 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 17 | \n",
" 0.779858 | \n",
" 509 | \n",
" 107.205374 | \n",
" 0.765568 | \n",
" (17, 7675088) | \n",
" 17 | \n",
" 7675088 | \n",
" 8 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 18 | \n",
" 0.924295 | \n",
" 502 | \n",
" 79.051527 | \n",
" 0.615436 | \n",
" (12, 6018901) | \n",
" 12 | \n",
" 6018901 | \n",
" 7 | \n",
" VWF | \n",
" NaN | \n",
"
\n",
" \n",
" 19 | \n",
" 0.967688 | \n",
" 524 | \n",
" 950.933206 | \n",
" 0.671795 | \n",
" (6, 31271836) | \n",
" 6 | \n",
" 31271836 | \n",
" 7 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 20 | \n",
" 0.524973 | \n",
" 502 | \n",
" 1370.792233 | \n",
" 0.456735 | \n",
" (6, 31356729) | \n",
" 6 | \n",
" 31356729 | \n",
" 7 | \n",
" MIR6891 | \n",
" NaN | \n",
"
\n",
" \n",
" 21 | \n",
" 0.966475 | \n",
" 1 | \n",
" 0.392720 | \n",
" 0.500000 | \n",
" (7, 117548682) | \n",
" 7 | \n",
" 117548682 | \n",
" 7 | \n",
" CFTR | \n",
" NaN | \n",
"
\n",
" \n",
" 22 | \n",
" 0.843596 | \n",
" 522 | \n",
" 2902.068702 | \n",
" 0.470996 | \n",
" (6, 29943406) | \n",
" 6 | \n",
" 29943406 | \n",
" 7 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 23 | \n",
" 0.624032 | \n",
" 480 | \n",
" 2319.403475 | \n",
" 0.517632 | \n",
" (6, 29943422) | \n",
" 6 | \n",
" 29943422 | \n",
" 7 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 24 | \n",
" 1.000000 | \n",
" 520 | \n",
" 123.956023 | \n",
" 1.000000 | \n",
" (17, 7674221) | \n",
" 17 | \n",
" 7674221 | \n",
" 7 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 25 | \n",
" 0.976915 | \n",
" 3 | \n",
" 0.319915 | \n",
" 0.498495 | \n",
" (7, 142750675) | \n",
" 7 | \n",
" 142750675 | \n",
" 7 | \n",
" PRSS1 | \n",
" NaN | \n",
"
\n",
" \n",
" 26 | \n",
" 0.881503 | \n",
" 212 | \n",
" 9.790076 | \n",
" 0.500000 | \n",
" (7, 152238825) | \n",
" 7 | \n",
" 152238825 | \n",
" 6 | \n",
" KMT2C | \n",
" NaN | \n",
"
\n",
" \n",
" 27 | \n",
" 0.777019 | \n",
" 518 | \n",
" 121.843511 | \n",
" 0.875674 | \n",
" (17, 7673776) | \n",
" 17 | \n",
" 7673776 | \n",
" 6 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 28 | \n",
" 0.501689 | \n",
" 472 | \n",
" 84.025794 | \n",
" 0.500000 | \n",
" (9, 128257486) | \n",
" 9 | \n",
" 128257486 | \n",
" 6 | \n",
" GOLGA2 | \n",
" NaN | \n",
"
\n",
" \n",
" 29 | \n",
" 0.962142 | \n",
" 4 | \n",
" 0.215953 | \n",
" 0.501053 | \n",
" (7, 142750600) | \n",
" 7 | \n",
" 142750600 | \n",
" 6 | \n",
" PRSS1 | \n",
" NaN | \n",
"
\n",
" \n",
" 30 | \n",
" 0.116652 | \n",
" 481 | \n",
" 300.219466 | \n",
" 0.601301 | \n",
" (7, 55165350) | \n",
" 7 | \n",
" 55165350 | \n",
" 6 | \n",
" EGFR | \n",
" Oncogene | \n",
"
\n",
" \n",
" 31 | \n",
" 0.999499 | \n",
" 8 | \n",
" 2.092742 | \n",
" 0.781609 | \n",
" (12, 2685853) | \n",
" 12 | \n",
" 2685853 | \n",
" 6 | \n",
" CACNA1C | \n",
" NaN | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 73 | \n",
" 0.741291 | \n",
" 506 | \n",
" 315.198473 | \n",
" 0.621487 | \n",
" (6, 32664883) | \n",
" 6 | \n",
" 32664883 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 74 | \n",
" 0.026316 | \n",
" 436 | \n",
" 162.306796 | \n",
" 0.784810 | \n",
" (6, 32664926) | \n",
" 6 | \n",
" 32664926 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 75 | \n",
" 0.863203 | \n",
" 395 | \n",
" 31.936902 | \n",
" 0.488386 | \n",
" (10, 4999206) | \n",
" 10 | \n",
" 4999206 | \n",
" 3 | \n",
" AKR1C2 | \n",
" NaN | \n",
"
\n",
" \n",
" 76 | \n",
" 0.815908 | \n",
" 493 | \n",
" 215.211832 | \n",
" 0.928361 | \n",
" (6, 32661393) | \n",
" 6 | \n",
" 32661393 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 77 | \n",
" 0.380747 | \n",
" 480 | \n",
" 191.682692 | \n",
" 0.596527 | \n",
" (6, 32661384) | \n",
" 6 | \n",
" 32661384 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 78 | \n",
" 0.778990 | \n",
" 27 | \n",
" 2.581081 | \n",
" 0.645061 | \n",
" (6, 32661333) | \n",
" 6 | \n",
" 32661333 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 79 | \n",
" 0.007407 | \n",
" 0 | \n",
" 1.307692 | \n",
" 0.481203 | \n",
" (12, 57099758) | \n",
" 12 | \n",
" 57099758 | \n",
" 3 | \n",
" STAT6 | \n",
" NaN | \n",
"
\n",
" \n",
" 81 | \n",
" 0.993934 | \n",
" 523 | \n",
" 1127.133588 | \n",
" 0.582806 | \n",
" (16, 2106849) | \n",
" 16 | \n",
" 2106849 | \n",
" 3 | \n",
" MIR6511B1 | \n",
" NaN | \n",
"
\n",
" \n",
" 82 | \n",
" 0.995522 | \n",
" 517 | \n",
" 1391.395753 | \n",
" 0.989221 | \n",
" (6, 29942795) | \n",
" 6 | \n",
" 29942795 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 83 | \n",
" 0.965023 | \n",
" 510 | \n",
" 2437.399610 | \n",
" 0.856497 | \n",
" (6, 29942858) | \n",
" 6 | \n",
" 29942858 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 84 | \n",
" 0.061988 | \n",
" 524 | \n",
" 2200.085878 | \n",
" 0.573877 | \n",
" (6, 29942916) | \n",
" 6 | \n",
" 29942916 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 85 | \n",
" 0.756602 | \n",
" 29 | \n",
" 2.614504 | \n",
" 0.669605 | \n",
" (3, 75630855) | \n",
" 3 | \n",
" 75630855 | \n",
" 3 | \n",
" LOC107986102 | \n",
" NaN | \n",
"
\n",
" \n",
" 86 | \n",
" 0.224100 | \n",
" 13 | \n",
" 1.728489 | \n",
" 0.588009 | \n",
" (3, 75630794) | \n",
" 3 | \n",
" 75630794 | \n",
" 3 | \n",
" LOC107986102 | \n",
" NaN | \n",
"
\n",
" \n",
" 87 | \n",
" 0.549532 | \n",
" 103 | \n",
" 7.149510 | \n",
" 0.523889 | \n",
" (3, 49686483) | \n",
" 3 | \n",
" 49686483 | \n",
" 3 | \n",
" MST1 | \n",
" NaN | \n",
"
\n",
" \n",
" 88 | \n",
" 0.683612 | \n",
" 519 | \n",
" 1443.973282 | \n",
" 0.499179 | \n",
" (6, 29943463) | \n",
" 6 | \n",
" 29943463 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 89 | \n",
" 0.758810 | \n",
" 35 | \n",
" 3.059961 | \n",
" 0.520303 | \n",
" (6, 29943667) | \n",
" 6 | \n",
" 29943667 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 90 | \n",
" 0.858112 | \n",
" 523 | \n",
" 66.984733 | \n",
" 0.899413 | \n",
" (3, 179234284) | \n",
" 3 | \n",
" 179234284 | \n",
" 3 | \n",
" PIK3CA | \n",
" Oncogene | \n",
"
\n",
" \n",
" 91 | \n",
" 0.844742 | \n",
" 513 | \n",
" 88.143130 | \n",
" 0.874750 | \n",
" (17, 7674888) | \n",
" 17 | \n",
" 7674888 | \n",
" 3 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 92 | \n",
" 1.000000 | \n",
" 517 | \n",
" 117.526718 | \n",
" 1.000000 | \n",
" (17, 7674256) | \n",
" 17 | \n",
" 7674256 | \n",
" 3 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 93 | \n",
" 0.493328 | \n",
" 512 | \n",
" 82.120229 | \n",
" 0.692187 | \n",
" (17, 7673704) | \n",
" 17 | \n",
" 7673704 | \n",
" 3 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 94 | \n",
" 0.403970 | \n",
" 55 | \n",
" 3.786275 | \n",
" 0.490953 | \n",
" (17, 21416556) | \n",
" 17 | \n",
" 21416556 | \n",
" 3 | \n",
" KCNJ12 | \n",
" NaN | \n",
"
\n",
" \n",
" 95 | \n",
" 0.666459 | \n",
" 523 | \n",
" 986.114504 | \n",
" 0.500538 | \n",
" (8, 100709671) | \n",
" 8 | \n",
" 100709671 | \n",
" 3 | \n",
" PABPC1 | \n",
" NaN | \n",
"
\n",
" \n",
" 96 | \n",
" 0.951125 | \n",
" 0 | \n",
" 0.047244 | \n",
" 0.509217 | \n",
" (3, 183959847) | \n",
" 3 | \n",
" 183959847 | \n",
" 3 | \n",
" ABCC5 | \n",
" NaN | \n",
"
\n",
" \n",
" 97 | \n",
" 0.671406 | \n",
" 501 | \n",
" 94.854127 | \n",
" 0.794175 | \n",
" (17, 31350209) | \n",
" 17 | \n",
" 31350209 | \n",
" 3 | \n",
" NF1 | \n",
" TSG | \n",
"
\n",
" \n",
" 98 | \n",
" 0.530668 | \n",
" 518 | \n",
" 380.994275 | \n",
" 0.738304 | \n",
" (5, 68295269) | \n",
" 5 | \n",
" 68295269 | \n",
" 3 | \n",
" PIK3R1 | \n",
" TSG | \n",
"
\n",
" \n",
" 99 | \n",
" 0.870781 | \n",
" 495 | \n",
" 23.572519 | \n",
" 0.462571 | \n",
" (22, 24627926) | \n",
" 22 | \n",
" 24627926 | \n",
" 3 | \n",
" GGT1 | \n",
" NaN | \n",
"
\n",
" \n",
" 100 | \n",
" 0.754902 | \n",
" 0 | \n",
" 0.001957 | \n",
" 0.500000 | \n",
" (20, 8788776) | \n",
" 20 | \n",
" 8788776 | \n",
" 3 | \n",
" PLCB1 | \n",
" NaN | \n",
"
\n",
" \n",
" 101 | \n",
" 0.289047 | \n",
" 205 | \n",
" 13.273622 | \n",
" 0.551120 | \n",
" (6, 31271875) | \n",
" 6 | \n",
" 31271875 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 102 | \n",
" 0.318593 | \n",
" 524 | \n",
" 977.551527 | \n",
" 0.504434 | \n",
" (6, 31271839) | \n",
" 6 | \n",
" 31271839 | \n",
" 3 | \n",
" HLA | \n",
" NaN | \n",
"
\n",
" \n",
" 103 | \n",
" 0.953258 | \n",
" 522 | \n",
" 2469.611111 | \n",
" 0.582500 | \n",
" (6, 31356399) | \n",
" 6 | \n",
" 31356399 | \n",
" 3 | \n",
" MIR6891 | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
100 rows × 10 columns
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n",
"1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n",
"2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n",
"3 0.991546 457 38.312977 0.507767 (14, 32092134) 14 32092134 \n",
"5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n",
"6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n",
"7 0.738417 0 0.001931 0.500000 (8, 142877758) 8 142877758 \n",
"8 0.989537 91 5.262948 0.503589 (X, 24789042) X 24789042 \n",
"9 0.999940 513 210.776718 0.996541 (1, 109690516) 1 109690516 \n",
"10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n",
"11 0.684925 286 16.242366 0.613307 (6, 29944050) 6 29944050 \n",
"13 0.998226 524 1291.374046 0.694932 (X, 24788994) X 24788994 \n",
"14 0.994127 431 36.320537 0.621622 (12, 6018369) 12 6018369 \n",
"15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n",
"16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n",
"17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n",
"18 0.924295 502 79.051527 0.615436 (12, 6018901) 12 6018901 \n",
"19 0.967688 524 950.933206 0.671795 (6, 31271836) 6 31271836 \n",
"20 0.524973 502 1370.792233 0.456735 (6, 31356729) 6 31356729 \n",
"21 0.966475 1 0.392720 0.500000 (7, 117548682) 7 117548682 \n",
"22 0.843596 522 2902.068702 0.470996 (6, 29943406) 6 29943406 \n",
"23 0.624032 480 2319.403475 0.517632 (6, 29943422) 6 29943422 \n",
"24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n",
"25 0.976915 3 0.319915 0.498495 (7, 142750675) 7 142750675 \n",
"26 0.881503 212 9.790076 0.500000 (7, 152238825) 7 152238825 \n",
"27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n",
"28 0.501689 472 84.025794 0.500000 (9, 128257486) 9 128257486 \n",
"29 0.962142 4 0.215953 0.501053 (7, 142750600) 7 142750600 \n",
"30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n",
"31 0.999499 8 2.092742 0.781609 (12, 2685853) 12 2685853 \n",
".. ... ... ... ... ... .. ... \n",
"73 0.741291 506 315.198473 0.621487 (6, 32664883) 6 32664883 \n",
"74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n",
"75 0.863203 395 31.936902 0.488386 (10, 4999206) 10 4999206 \n",
"76 0.815908 493 215.211832 0.928361 (6, 32661393) 6 32661393 \n",
"77 0.380747 480 191.682692 0.596527 (6, 32661384) 6 32661384 \n",
"78 0.778990 27 2.581081 0.645061 (6, 32661333) 6 32661333 \n",
"79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n",
"81 0.993934 523 1127.133588 0.582806 (16, 2106849) 16 2106849 \n",
"82 0.995522 517 1391.395753 0.989221 (6, 29942795) 6 29942795 \n",
"83 0.965023 510 2437.399610 0.856497 (6, 29942858) 6 29942858 \n",
"84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n",
"85 0.756602 29 2.614504 0.669605 (3, 75630855) 3 75630855 \n",
"86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n",
"87 0.549532 103 7.149510 0.523889 (3, 49686483) 3 49686483 \n",
"88 0.683612 519 1443.973282 0.499179 (6, 29943463) 6 29943463 \n",
"89 0.758810 35 3.059961 0.520303 (6, 29943667) 6 29943667 \n",
"90 0.858112 523 66.984733 0.899413 (3, 179234284) 3 179234284 \n",
"91 0.844742 513 88.143130 0.874750 (17, 7674888) 17 7674888 \n",
"92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n",
"93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n",
"94 0.403970 55 3.786275 0.490953 (17, 21416556) 17 21416556 \n",
"95 0.666459 523 986.114504 0.500538 (8, 100709671) 8 100709671 \n",
"96 0.951125 0 0.047244 0.509217 (3, 183959847) 3 183959847 \n",
"97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n",
"98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n",
"99 0.870781 495 23.572519 0.462571 (22, 24627926) 22 24627926 \n",
"100 0.754902 0 0.001957 0.500000 (20, 8788776) 20 8788776 \n",
"101 0.289047 205 13.273622 0.551120 (6, 31271875) 6 31271875 \n",
"102 0.318593 524 977.551527 0.504434 (6, 31271839) 6 31271839 \n",
"103 0.953258 522 2469.611111 0.582500 (6, 31356399) 6 31356399 \n",
"\n",
" tcga_wxs_count Gene Classification \n",
"0 371 IDH1 Oncogene \n",
"1 59 TP53 TSG \n",
"2 38 IDH1 Oncogene \n",
"3 14 ARHGAP5 NaN \n",
"5 15 IDH2 Oncogene \n",
"6 15 TP53 TSG \n",
"7 11 CYP11B1 NaN \n",
"8 11 POLA1 NaN \n",
"9 10 GSTM1 NaN \n",
"10 10 TP53 TSG \n",
"11 9 HLA NaN \n",
"13 9 POLA1 NaN \n",
"14 9 VWF NaN \n",
"15 8 TP53 TSG \n",
"16 8 TP53 TSG \n",
"17 8 TP53 TSG \n",
"18 7 VWF NaN \n",
"19 7 HLA NaN \n",
"20 7 MIR6891 NaN \n",
"21 7 CFTR NaN \n",
"22 7 HLA NaN \n",
"23 7 HLA NaN \n",
"24 7 TP53 TSG \n",
"25 7 PRSS1 NaN \n",
"26 6 KMT2C NaN \n",
"27 6 TP53 TSG \n",
"28 6 GOLGA2 NaN \n",
"29 6 PRSS1 NaN \n",
"30 6 EGFR Oncogene \n",
"31 6 CACNA1C NaN \n",
".. ... ... ... \n",
"73 3 HLA NaN \n",
"74 3 HLA NaN \n",
"75 3 AKR1C2 NaN \n",
"76 3 HLA NaN \n",
"77 3 HLA NaN \n",
"78 3 HLA NaN \n",
"79 3 STAT6 NaN \n",
"81 3 MIR6511B1 NaN \n",
"82 3 HLA NaN \n",
"83 3 HLA NaN \n",
"84 3 HLA NaN \n",
"85 3 LOC107986102 NaN \n",
"86 3 LOC107986102 NaN \n",
"87 3 MST1 NaN \n",
"88 3 HLA NaN \n",
"89 3 HLA NaN \n",
"90 3 PIK3CA Oncogene \n",
"91 3 TP53 TSG \n",
"92 3 TP53 TSG \n",
"93 3 TP53 TSG \n",
"94 3 KCNJ12 NaN \n",
"95 3 PABPC1 NaN \n",
"96 3 ABCC5 NaN \n",
"97 3 NF1 TSG \n",
"98 3 PIK3R1 TSG \n",
"99 3 GGT1 NaN \n",
"100 3 PLCB1 NaN \n",
"101 3 HLA NaN \n",
"102 3 HLA NaN \n",
"103 3 MIR6891 NaN \n",
"\n",
"[100 rows x 10 columns]"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top100GeneDf"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
"dbsnpFlagDf=pd.read_csv('./Data/oncogene_ts.tsv',sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"geneToStatus=dbsnpFlagDf.set_index(['Gene Symbol'])['Classification*']"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {},
"outputs": [],
"source": [
"withClassificationStatDf=geneDf[geneDf.Classification.notnull()]"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
" Gene | \n",
" Classification | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.998211 | \n",
" 519 | \n",
" 133.822519 | \n",
" 0.984290 | \n",
" (2, 208248388) | \n",
" 2 | \n",
" 208248388 | \n",
" 371 | \n",
" IDH1 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 1 | \n",
" 0.543616 | \n",
" 520 | \n",
" 122.076336 | \n",
" 0.945266 | \n",
" (17, 7673803) | \n",
" 17 | \n",
" 7673803 | \n",
" 59 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 2 | \n",
" 0.865149 | \n",
" 519 | \n",
" 133.375954 | \n",
" 0.896410 | \n",
" (2, 208248389) | \n",
" 2 | \n",
" 208248389 | \n",
" 38 | \n",
" IDH1 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 5 | \n",
" 1.000000 | \n",
" 524 | \n",
" 353.841603 | \n",
" 1.000000 | \n",
" (15, 90088606) | \n",
" 15 | \n",
" 90088606 | \n",
" 15 | \n",
" IDH2 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 6 | \n",
" 0.954091 | \n",
" 520 | \n",
" 121.372849 | \n",
" 0.948889 | \n",
" (17, 7673802) | \n",
" 17 | \n",
" 7673802 | \n",
" 15 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 10 | \n",
" 0.975154 | \n",
" 496 | \n",
" 59.135496 | \n",
" 0.999370 | \n",
" (17, 7674872) | \n",
" 17 | \n",
" 7674872 | \n",
" 10 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 15 | \n",
" 0.895074 | \n",
" 520 | \n",
" 124.636711 | \n",
" 0.929366 | \n",
" (17, 7674220) | \n",
" 17 | \n",
" 7674220 | \n",
" 8 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 16 | \n",
" 0.850000 | \n",
" 510 | \n",
" 91.690840 | \n",
" 0.998394 | \n",
" (17, 7675076) | \n",
" 17 | \n",
" 7675076 | \n",
" 8 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 17 | \n",
" 0.779858 | \n",
" 509 | \n",
" 107.205374 | \n",
" 0.765568 | \n",
" (17, 7675088) | \n",
" 17 | \n",
" 7675088 | \n",
" 8 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 24 | \n",
" 1.000000 | \n",
" 520 | \n",
" 123.956023 | \n",
" 1.000000 | \n",
" (17, 7674221) | \n",
" 17 | \n",
" 7674221 | \n",
" 7 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 27 | \n",
" 0.777019 | \n",
" 518 | \n",
" 121.843511 | \n",
" 0.875674 | \n",
" (17, 7673776) | \n",
" 17 | \n",
" 7673776 | \n",
" 6 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 30 | \n",
" 0.116652 | \n",
" 481 | \n",
" 300.219466 | \n",
" 0.601301 | \n",
" (7, 55165350) | \n",
" 7 | \n",
" 55165350 | \n",
" 6 | \n",
" EGFR | \n",
" Oncogene | \n",
"
\n",
" \n",
" 34 | \n",
" 0.831944 | \n",
" 519 | \n",
" 120.805344 | \n",
" 0.871324 | \n",
" (17, 7674230) | \n",
" 17 | \n",
" 7674230 | \n",
" 5 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 35 | \n",
" 0.795725 | \n",
" 513 | \n",
" 99.967557 | \n",
" 0.869141 | \n",
" (17, 7674945) | \n",
" 17 | \n",
" 7674945 | \n",
" 5 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 37 | \n",
" 0.718190 | \n",
" 464 | \n",
" 182.276718 | \n",
" 0.845446 | \n",
" (7, 55154129) | \n",
" 7 | \n",
" 55154129 | \n",
" 5 | \n",
" EGFR | \n",
" Oncogene | \n",
"
\n",
" \n",
" 53 | \n",
" 0.504771 | \n",
" 237 | \n",
" 10.774809 | \n",
" 0.500000 | \n",
" (3, 179199690) | \n",
" 3 | \n",
" 179199690 | \n",
" 4 | \n",
" PIK3CA | \n",
" Oncogene | \n",
"
\n",
" \n",
" 67 | \n",
" 1.000000 | \n",
" 516 | \n",
" 115.853053 | \n",
" 1.000000 | \n",
" (17, 7676044) | \n",
" 17 | \n",
" 7676044 | \n",
" 4 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 90 | \n",
" 0.858112 | \n",
" 523 | \n",
" 66.984733 | \n",
" 0.899413 | \n",
" (3, 179234284) | \n",
" 3 | \n",
" 179234284 | \n",
" 3 | \n",
" PIK3CA | \n",
" Oncogene | \n",
"
\n",
" \n",
" 91 | \n",
" 0.844742 | \n",
" 513 | \n",
" 88.143130 | \n",
" 0.874750 | \n",
" (17, 7674888) | \n",
" 17 | \n",
" 7674888 | \n",
" 3 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 92 | \n",
" 1.000000 | \n",
" 517 | \n",
" 117.526718 | \n",
" 1.000000 | \n",
" (17, 7674256) | \n",
" 17 | \n",
" 7674256 | \n",
" 3 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 93 | \n",
" 0.493328 | \n",
" 512 | \n",
" 82.120229 | \n",
" 0.692187 | \n",
" (17, 7673704) | \n",
" 17 | \n",
" 7673704 | \n",
" 3 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 97 | \n",
" 0.671406 | \n",
" 501 | \n",
" 94.854127 | \n",
" 0.794175 | \n",
" (17, 31350209) | \n",
" 17 | \n",
" 31350209 | \n",
" 3 | \n",
" NF1 | \n",
" TSG | \n",
"
\n",
" \n",
" 98 | \n",
" 0.530668 | \n",
" 518 | \n",
" 380.994275 | \n",
" 0.738304 | \n",
" (5, 68295269) | \n",
" 5 | \n",
" 68295269 | \n",
" 3 | \n",
" PIK3R1 | \n",
" TSG | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n",
"1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n",
"2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n",
"5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n",
"6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n",
"10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n",
"15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n",
"16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n",
"17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n",
"24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n",
"27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n",
"30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n",
"34 0.831944 519 120.805344 0.871324 (17, 7674230) 17 7674230 \n",
"35 0.795725 513 99.967557 0.869141 (17, 7674945) 17 7674945 \n",
"37 0.718190 464 182.276718 0.845446 (7, 55154129) 7 55154129 \n",
"53 0.504771 237 10.774809 0.500000 (3, 179199690) 3 179199690 \n",
"67 1.000000 516 115.853053 1.000000 (17, 7676044) 17 7676044 \n",
"90 0.858112 523 66.984733 0.899413 (3, 179234284) 3 179234284 \n",
"91 0.844742 513 88.143130 0.874750 (17, 7674888) 17 7674888 \n",
"92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n",
"93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n",
"97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n",
"98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n",
"\n",
" tcga_wxs_count Gene Classification \n",
"0 371 IDH1 Oncogene \n",
"1 59 TP53 TSG \n",
"2 38 IDH1 Oncogene \n",
"5 15 IDH2 Oncogene \n",
"6 15 TP53 TSG \n",
"10 10 TP53 TSG \n",
"15 8 TP53 TSG \n",
"16 8 TP53 TSG \n",
"17 8 TP53 TSG \n",
"24 7 TP53 TSG \n",
"27 6 TP53 TSG \n",
"30 6 EGFR Oncogene \n",
"34 5 TP53 TSG \n",
"35 5 TP53 TSG \n",
"37 5 EGFR Oncogene \n",
"53 4 PIK3CA Oncogene \n",
"67 4 TP53 TSG \n",
"90 3 PIK3CA Oncogene \n",
"91 3 TP53 TSG \n",
"92 3 TP53 TSG \n",
"93 3 TP53 TSG \n",
"97 3 NF1 TSG \n",
"98 3 PIK3R1 TSG "
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"withClassificationStatDf"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 0.998211\n",
"2 0.865149\n",
"5 1.000000\n",
"30 0.116652\n",
"37 0.718190\n",
"53 0.504771\n",
"90 0.858112\n",
"Name: auprc, dtype: float64"
]
},
"execution_count": 136,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"g=withClassificationStatDf.groupby('Classification')['auprc']\n",
"\n",
"g.get_group('Oncogene')"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 131,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFH5JREFUeJzt3XuUXeV93vHvw8iAMJcESSHOgBCp5MQKdoAI146bGhzhJVgJlAbHJk4htmOSNgjVcd1lOy6hJPFKSNpUYBwMNEakXib4UkfLVbgYm+C2hiIu5mYTTwgXCWqETLlYXCzx6x9nz2YYRtIR1tY5mvl+1pq1Zr/7PXv/ZnQ0z3n35d2pKiRJAthj0AVIkoaHoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqTWrEEXsKPmzp1bCxYsGHQZkrRbueWWWx6rqnnb67fbhcKCBQtYu3btoMuQpN1Kkgf66efhI0lSy1CQJLUMBUlSy1CQJLUMBUlSq7NQSPKXSR5NctdW1ifJ+UnGktyR5KiuapEk9afLkcJlwLJtrD8eWNR8nQH8RYe1SJL60Nl9ClV1Q5IF2+hyEnB59Z4HemOSH0nymqp6pKuahsUFF1zA2NjYoMtg/fr1AIyOjg60joULF7J8+fKB1qAXDcP7c1jemzDz3p+DvHltFHhowvK6pu1loZDkDHqjCebPn79LipsJnnnmmUGXIE3J9+bgDDIUMkVbTdWxqi4GLgZYsmTJlH12J8PyqWPFihUArFy5csCVaJgMw/vT9+bgDPLqo3XAIROWDwYeHlAtkiQGGwqrgdOaq5DeBDwxE84nSNIw6+zwUZLPAscAc5OsA34feBVAVV0ErAFOAMaATcB7uqpFktSfLq8+OnU76wv4na72L0nacd7RLElqGQqSpNZu95AdaboahpvGhsX472H80tSZblfeQGcoSENibGyM79x9G/P33TLoUgZuzx/0DmI894BPWXzw6ZFduj9DQRoi8/fdwkePenLQZWiIfPzW/Xfp/jynIElqGQqSpJahIElqGQqSpNaMO9HsZX8v8rK/l5pp8+ZLU5lxoTA2Nsbtd32LLfscOOhSBm6P53uzkN9y33cHXMngjWz63qBLkIbCjAsFgC37HMgzP33CoMvQEJn97TWDLkEaCp5TkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1ZtwsqevXr2dk0xPOiqmXGNm0kfXrNw+6DGngHClIklozbqQwOjrK/31uls9T0EvM/vYaRkcPGnQZ0sA5UpAktWbcSEEaVuvXr+f7T43w8Vv3H3QpGiIPPDXCq9ev32X7c6QgSWo5UpCGxOjoKM9tfoSPHvXkoEvREPn4rfuz1+joLtufIwVJUstQkCS1DAVJUqvTUEiyLMm9ScaSfHiK9fOTfC3JbUnuSOLNA5I0QJ2FQpIR4ELgeGAxcGqSxZO6fQy4sqqOBN4FfLKreiRJ29flSOGNwFhV3VdVzwNXACdN6lPA+EXZBwAPd1iPJGk7urwkdRR4aMLyOuCfTupzDnBNkuXAq4GlHdYjSdqOLkcKmaKtJi2fClxWVQcDJwB/leRlNSU5I8naJGs3bNjQQamSJOg2FNYBh0xYPpiXHx56H3AlQFV9A9gbmDt5Q1V1cVUtqaol8+bN66hcSVKXoXAzsCjJYUn2pHciefWkPg8CvwiQ5HX0QsGhgCQNSGehUFWbgTOBq4Fv0bvK6O4k5yY5sen2QeD9Sb4JfBb4jaqafIhJkrSLdDr3UVWtAdZMajt7wvf3AG/psgZJUv+8o1mS1DIUJEktQ0GS1DIUJEktQ0GS1PLJa9IQefBpn9EM8N1Nvc+rB+3zwoArGbwHnx5h0S7cn6EgDYmFCxcOuoSh8fzYGAB7HervZBG79r1hKEhDYvny5YMuYWisWLECgJUrVw64kpnHcwqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqzepy40mWASuBEeDSqvrjKfr8KnAOUMA3q+rXuqwJYGTT95j97TVd72bo7fHskwC8sPf+A65k8EY2fQ84aNBlSAPXWSgkGQEuBI4D1gE3J1ldVfdM6LMI+Ajwlqp6PMmPdVXPuIULF3a9i93G2NhTACz8Sf8YwkG+NyT6DIUkHwfOq6r/1yz/KPDBqvrYNl72RmCsqu5rXnMFcBJwz4Q+7wcurKrHAarq0R3/EXbM8uXLu97FbmPFihUArFy5csCVSBoW/Z5TOH48EACaP+InbOc1o8BDE5bXNW0TvRZ4bZL/leTG5nCTJGlA+j18NJJkr6p6DiDJbGCv7bwmU7TVFPtfBBwDHAx8PcnhEwOo2d8ZwBkA8+fP77NkSdKO6nek8N+A65K8L8l7gWuBVdt5zTrgkAnLBwMPT9Hnb6rqB1X1j8C99ELiJarq4qpaUlVL5s2b12fJkqQd1VcoVNV5wB8CrwN+BviDpm1bbgYWJTksyZ7Au4DVk/p8CTgWIMlceoeT7uu/fEnSzrTdw0fNVURXV9VS4Kp+N1xVm5OcCVxN75LUv6yqu5OcC6ytqtXNurcnuQfYAnyoqja+kh9EkvTD224oVNWWJJuSHFBVT+zIxqtqDbBmUtvZE74v4HebL0nSgPV7ovlZ4M4k1wLfH2+sqrM6qUqSNBD9hsL/aL4kSdNYX6FQVauak8U/Te+y0nur6vlOK5Mk7XL93tF8AvAp4B/o3X9wWJLfqqq/7bI4SdKu1e/ho/8MHFtVYwBJ/gm9w0mGgiRNI/3evPboeCA07gM6n6dIkrRr9TtSuDvJGuBKeucU3kFv1tN/CVBVX+yoPknSLtRvKOwNfBd4a7O8ATgQ+GV6IWEoSNI00O/VR+/puhBJ0uD1e/XRp3n5DKdU1Xt3ekWSpIHp9/DRlyd8vzdwMi+f8VSStJvr9/DRFyYuJ/ks8JVOKpIkDUy/l6ROtgjwaTeSNM30e07hKV48p1D0rkT6910VJUkajH4PH+2X5EB6I4S9x5s7q0qSNBD9jhR+E1hB75GatwNvAr4BvK270qavCy64gLGxse137Nh4DStWrBhoHQsXLmT58uUDrUFST7/nFFYARwMPVNWxwJH0bmDTbmz27NnMnj170GVIGiJ9P2Snqp5NQpK9qurbSX6q08qmMT8VSxpW/YbCuiQ/AnwJuDbJ43ifgjQtDcPhzWE5tAkz7/BmvyeaT26+PSfJ14ADgKs6q0rSjOZhzcHpd6TQqqq/66IQScNhJn0q1su90pvXJEnTkKEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKkVqehkGRZknuTjCX58Db6nZKkkizpsh5J0rZ1FgpJRoALgeOBxcCpSRZP0W8/4Czgpq5qkST1p8uRwhuBsaq6r6qeB64ATpqi3x8A5wHPdliLJKkPXYbCKPDQhOV1TVsryZHAIVX15Q7rkCT1qctQyBRt1a5M9gD+HPjgdjeUnJFkbZK1GzZs2IklSpIm6jIU1gGHTFg+GHh4wvJ+wOHA9UnuB94ErJ7qZHNVXVxVS6pqybx58zosWZJmti5D4WZgUZLDkuwJvAtYPb6yqp6oqrlVtaCqFgA3AidW1doOa5IkbUNnoVBVm4EzgauBbwFXVtXdSc5NcmJX+5UkvXKzutx4Va0B1kxqO3srfY/pshZJ0vZ5R7MkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJanYZCkmVJ7k0yluTDU6z/3ST3JLkjyXVJDu2yHknStnUWCklGgAuB44HFwKlJFk/qdhuwpKreAHweOK+reiRJ29flSOGNwFhV3VdVzwNXACdN7FBVX6uqTc3ijcDBHdYjSdqOLkNhFHhowvK6pm1r3gf87VQrkpyRZG2StRs2bNiJJUqSJuoyFDJFW03ZMfl1YAnwp1Otr6qLq2pJVS2ZN2/eTixRkjTRrA63vQ44ZMLywcDDkzslWQr8HvDWqnquw3okSdvR5UjhZmBRksOS7Am8C1g9sUOSI4FPASdW1aMd1iJJ6kNnoVBVm4EzgauBbwFXVtXdSc5NcmLT7U+BfYHPJbk9yeqtbE6StAt0efiIqloDrJnUdvaE75d2uX9J0o7xjmZJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQmME2btzIWWedxcaNGwddiqQhYSjMYKtWreLOO+/k8ssvH3QpkoaEoTBDbdy4kauuuoqq4qqrrnK0IAkwFGasVatW8cILLwCwZcsWRwuSAENhxvrKV77C5s2bAdi8eTPXXnvtgCuSNAwMhRlq6dKlzJrVmzl91qxZHHfccQOuSNIwMBRmqNNPP5099uj984+MjHDaaacNuCJJw8BQmKHmzJnDsmXLSMKyZcuYM2fOoEuSNAQ6ffKahtvpp5/O/fff7yhBUstQmMHmzJnD+eefP+gyJA0RDx9JklqGgiSpZShIklqGgiSplaoadA07JMkG4IFB1zGNzAUeG3QR0hR8b+5ch1bVvO112u1CQTtXkrVVtWTQdUiT+d4cDA8fSZJahoIkqWUo6OJBFyBthe/NAfCcgiSp5UhBktRy7qMhl+Rg4EJgMb0Q/zLwoap6fqCFSa9QkjnAdc3ijwNbgA3N8n8HfrVpewH4raq6Kcks4FzgHcD3m76fq6o/2mWFzxCOFIZYkgBfBL5UVYuA1wL7Av5H0G6rqjZW1RFVdQRwEfDnzff/GlgGHFVVbwCWAg81L/tD4CeA1zd9fwF41a6vfvozFIbb24Bnq+rTAFW1BfgA8N4k/ybJF5NcleQ7Sc4bf1GSZUluTfLNJNc1bQcm+VKSO5LcmOQNTfu8JNc2/T+V5IEkc5t1v57k/yS5vVk30rQ/neSPmu3fmOSgCdv6QpKbm6+37NLflnZ3rwEeq6rnAKrqsap6OMk+wPuB5VX1bLPuqao6Z3ClTl+GwnD7GeCWiQ1V9STwIL1Df0cA7wReD7wzySFJ5gGXAL9SVT9Lb7gN8B+B25pPYB8FLm/afx/4alUdRW/oPh8gyeuabb+l+WS2BXh385pXAzc227+B3n9YgJX0PvUdDfwKcOnO+kVoRrgGOCTJ3yf5ZJK3Nu0LgQer6qkB1jZjeE5huAWY6vKw8fbrquoJgCT3AIcCPwrcUFX/CFBV32te88/o/aGmqr6aZE6SA5r2k5v2q5I83vT/ReDngJt7R7GYDTzarHue3rkN6IXW+AOelwKLm/4A+yfZz//M6kdVPZ3k5+gdGjoW+OskHwZundgvyXuAFcAc4Oer6qGXbUyvmKEw3O6m+UM+Lsn+wCH0Prk/N2HVFnr/ntsKkslqK+3j/VdV1UemWPeDevFa5vH9Qm/k+eaqemYr25S2qTlEej1wfZI7gdOBK4H54x8wmsOpn05yFzAyuGqnJw8fDbfrgH2SnAbQHNP/T8BlwKatvOYbwFuTHNa85sCm/Qaawz9JjqF37PZJ4H/Su9qDJG+nN9IY3/cpSX5sfDtJDt1OvdcAZ44vJDmi3x9USvJTSRZNaDoCeKCqNgH/FfhEkr2bviPAngMoc9ozFIZY82n8ZOAdSb4D/D3wLL1zAlt7zQbgDOCLSb4J/HWz6hxgSZI7gD+m9wkMeuca3p7kVuB44BHgqaq6B/gYcE3zmmvpnQjclrPG99EczvrtHfyRNbPtC6xKck/znltM730L8Hv03pt3JbkN+DqwCnh4EIVOZ97RPMMl2QvYUlWbk7wZ+IvmxLKkGchzCpoPXJlkD3onkN+/nf6SpjFHCpKklucUJEktQ0GS1DIUJEktQ0HTSpIfT3JFkn9oLm1ck+S1zY1OO2sf5yZZ2nz/C0nubuaHGk3y+Ve4zd9I8hMTli9Nsnhn1Sz1yxPNmjaaWWX/N707sS9q2o4A9qN3qe3hHezzIuCm8UkLf4jtXA/8u6pau1MKk14hRwqaTo6lNwXHReMNVXU7L06/TJIFSb7ezAp7a5Kfb9pfk+SG5hP/Xc0IYCTJZc3ynUk+0PS9LMkpSX6T3t3gZyf5TLPtu5o+I0n+rHndHUmWN+1nNzPI3pXk4vScAiwBPtPsf3aS65MsaV5zarOdu5L8yYSfZcrZaqUfhqGg6eRwJs0qO4VHgeOaWWHfCZzftP8acHVz497PArfTm2ZhtKoOr6rXAy8ZDVTVpcBqeg89ejcvdQZwGHBkMzPtZ5r2T1TV0c2oZTbwS1X1eWAt8O7mOQPt3FHNIaU/oTeN+hHA0Un+RbN6a7PVSq+YoaCZ5lXAJc1ka5+jN5UCwM3Ae5KcQ+9BLk8B9wE/meSCJMuAJ3dgP0uBi6pqM7xkttpjk9zU7P9t9KZH35ajgeurakOzrc8A/7xZN3m22gU7UJ80JUNB08nd9Kb73pYPAN+lNxpYQjOpWlXdQO+P7Xrgr5KcVlWPN/2uB36HHXs+xMtmq20mc/skcEoz8rgE2LuP7WzN1marlV4xQ0HTyVeBvZK0h1GSHE3vORPjDgAeqaoXgH9FM/VyMwPso1V1Cb0ZOY9K7wl0e1TVF4D/ABy1A7VcA/x2es8WHp+tdjwAHkuyL3DKhP5P0TshPtlN9Ga9ndvMDHoq8Hc7UIe0Q/xkoWmjqirJycB/aR7O8ixwP/BvJ3T7JPCFJO8AvsaLD4E/BvhQkh8ATwOnAaP05u0f//A01bMltuZSes/UvqPZ5iVV9YkklwB3NnXdPKH/ZcBFSZ4B3jzhZ3okyUeaWgOsqaq/2YE6pB3iJamSpJaHjyRJLUNBktQyFCRJLUNBktQyFCRJLUNBktQyFCRJLUNBktT6/0JWkTzJE9KoAAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.boxplot(data=withClassificationStatDf,x='Classification',y='auprc')"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/cellar/users/btsui/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: FutureWarning: \n",
"Passing list-likes to .loc or [] with any missing label will raise\n",
"KeyError in the future, you can use .reindex() as an alternative.\n",
"\n",
"See the documentation here:\n",
"https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"geneDf['Classification']=geneToStatus.loc[geneDf.Gene].values"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [],
"source": [
"#top100GeneDf"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
" Gene | \n",
" Classification | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.998211 | \n",
" 519 | \n",
" 133.822519 | \n",
" 0.984290 | \n",
" (2, 208248388) | \n",
" 2 | \n",
" 208248388 | \n",
" 371 | \n",
" IDH1 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 1 | \n",
" 0.543616 | \n",
" 520 | \n",
" 122.076336 | \n",
" 0.945266 | \n",
" (17, 7673803) | \n",
" 17 | \n",
" 7673803 | \n",
" 59 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 5 | \n",
" 1.000000 | \n",
" 524 | \n",
" 353.841603 | \n",
" 1.000000 | \n",
" (15, 90088606) | \n",
" 15 | \n",
" 90088606 | \n",
" 15 | \n",
" IDH2 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 30 | \n",
" 0.116652 | \n",
" 481 | \n",
" 300.219466 | \n",
" 0.601301 | \n",
" (7, 55165350) | \n",
" 7 | \n",
" 55165350 | \n",
" 6 | \n",
" EGFR | \n",
" Oncogene | \n",
"
\n",
" \n",
" 53 | \n",
" 0.504771 | \n",
" 237 | \n",
" 10.774809 | \n",
" 0.500000 | \n",
" (3, 179199690) | \n",
" 3 | \n",
" 179199690 | \n",
" 4 | \n",
" PIK3CA | \n",
" Oncogene | \n",
"
\n",
" \n",
" 97 | \n",
" 0.671406 | \n",
" 501 | \n",
" 94.854127 | \n",
" 0.794175 | \n",
" (17, 31350209) | \n",
" 17 | \n",
" 31350209 | \n",
" 3 | \n",
" NF1 | \n",
" TSG | \n",
"
\n",
" \n",
" 98 | \n",
" 0.530668 | \n",
" 518 | \n",
" 380.994275 | \n",
" 0.738304 | \n",
" (5, 68295269) | \n",
" 5 | \n",
" 68295269 | \n",
" 3 | \n",
" PIK3R1 | \n",
" TSG | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n",
"1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n",
"5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n",
"30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n",
"53 0.504771 237 10.774809 0.500000 (3, 179199690) 3 179199690 \n",
"97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n",
"98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n",
"\n",
" tcga_wxs_count Gene Classification \n",
"0 371 IDH1 Oncogene \n",
"1 59 TP53 TSG \n",
"5 15 IDH2 Oncogene \n",
"30 6 EGFR Oncogene \n",
"53 4 PIK3CA Oncogene \n",
"97 3 NF1 TSG \n",
"98 3 PIK3R1 TSG "
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geneDf.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
" Gene | \n",
" Classification | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.998211 | \n",
" 519 | \n",
" 133.822519 | \n",
" 0.984290 | \n",
" (2, 208248388) | \n",
" 2 | \n",
" 208248388 | \n",
" 371 | \n",
" IDH1 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 1 | \n",
" 0.543616 | \n",
" 520 | \n",
" 122.076336 | \n",
" 0.945266 | \n",
" (17, 7673803) | \n",
" 17 | \n",
" 7673803 | \n",
" 59 | \n",
" TP53 | \n",
" TSG | \n",
"
\n",
" \n",
" 5 | \n",
" 1.000000 | \n",
" 524 | \n",
" 353.841603 | \n",
" 1.000000 | \n",
" (15, 90088606) | \n",
" 15 | \n",
" 90088606 | \n",
" 15 | \n",
" IDH2 | \n",
" Oncogene | \n",
"
\n",
" \n",
" 30 | \n",
" 0.116652 | \n",
" 481 | \n",
" 300.219466 | \n",
" 0.601301 | \n",
" (7, 55165350) | \n",
" 7 | \n",
" 55165350 | \n",
" 6 | \n",
" EGFR | \n",
" Oncogene | \n",
"
\n",
" \n",
" 53 | \n",
" 0.504771 | \n",
" 237 | \n",
" 10.774809 | \n",
" 0.500000 | \n",
" (3, 179199690) | \n",
" 3 | \n",
" 179199690 | \n",
" 4 | \n",
" PIK3CA | \n",
" Oncogene | \n",
"
\n",
" \n",
" 97 | \n",
" 0.671406 | \n",
" 501 | \n",
" 94.854127 | \n",
" 0.794175 | \n",
" (17, 31350209) | \n",
" 17 | \n",
" 31350209 | \n",
" 3 | \n",
" NF1 | \n",
" TSG | \n",
"
\n",
" \n",
" 98 | \n",
" 0.530668 | \n",
" 518 | \n",
" 380.994275 | \n",
" 0.738304 | \n",
" (5, 68295269) | \n",
" 5 | \n",
" 68295269 | \n",
" 3 | \n",
" PIK3R1 | \n",
" TSG | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n",
"1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n",
"5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n",
"30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n",
"53 0.504771 237 10.774809 0.500000 (3, 179199690) 3 179199690 \n",
"97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n",
"98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n",
"\n",
" tcga_wxs_count Gene Classification \n",
"0 371 IDH1 Oncogene \n",
"1 59 TP53 TSG \n",
"5 15 IDH2 Oncogene \n",
"30 6 EGFR Oncogene \n",
"53 4 PIK3CA Oncogene \n",
"97 3 NF1 TSG \n",
"98 3 PIK3R1 TSG "
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"geneDf.dropna()"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADdVJREFUeJzt3X+MZfVZx/H3Y7eNhKlA3TJZF2RqQkk3HW27k4bExN6RtCIkpbWpgWhllTrGtGjMarLqHyUa4/4hNjE2MdQSsEmZ1EYFAUVcGTca2jgrlF1KEMS17rKBUmDTQaIuefxjLsm67Ow995577p15fL+Sm7nnx5zv8+yd+eTM995zNjITSdLW913TLkCSNB4GuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhHbJjnY9u3bc25ubpJDdu6VV17h/PPPn3YZY2VPW0fFvir2BO36OnTo0AuZ+fZB+0000Ofm5lhdXZ3kkJ1bWVmh1+tNu4yxsqeto2JfFXuCdn1FxL832c8pF0kqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqYqJXim5Vc/vu23Db3vlT7DnH9jaO7r+2k+NKqskzdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIGBnpEXBoRD0XEExHxeET8cn/92yLiwYh4qv/1ou7LlSRtpMkZ+ilgb2a+C7gS+FRE7AL2AQcy83LgQH9ZkjQlAwM9M09k5j/3n38HeALYCVwH3Nnf7U7gI10VKUkabKg59IiYA94LfA2YzcwTsB76wMXjLk6S1FxkZrMdI2aAvwd+JzP/LCJezswLT9v+Uma+YR49IpaAJYDZ2dndy8vL46l8gg4fP7nhttnz4LlXuxl3fucF3Rx4gLW1NWZmZqYydlcq9gQ1+6rYE7Tra3Fx8VBmLgzar1GgR8SbgXuBBzLz9/vrngR6mXkiInYAK5l5xbmOs7CwkKurq40a2Ezm9t234ba986e49fC2TsY9uv/aTo47yMrKCr1ebypjd6ViT1Czr4o9Qbu+IqJRoDf5lEsAXwCeeD3M++4Bbuw/vxG4e5RCJUnj0eTU8oeBTwCHI+LR/rrfAPYDX46Im4BvAh/vpkRJUhMDAz0z/wGIDTZfNd5yJEmj8kpRSSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgYGekTcHhHPR8SR09bdEhHHI+LR/uOabsuUJA3S5Az9DuDqs6z/bGa+p/+4f7xlSZKGNTDQM/Mg8OIEapEktdBmDv3TEfFYf0rmorFVJEkaSWTm4J0i5oB7M/Pd/eVZ4AUggd8GdmTmz23wvUvAEsDs7Ozu5eXlsRQ+SYePn9xw2+x58Nyr3Yw7v/OCbg48wNraGjMzM1MZuysVe4KafVXsCdr1tbi4eCgzFwbtN1KgN912poWFhVxdXR043mYzt+++DbftnT/FrYe3dTLu0f3XdnLcQVZWVuj1elMZuysVe4KafVXsCdr1FRGNAn2kKZeI2HHa4keBIxvtK0majIGnlhFxF9ADtkfEMeAzQC8i3sP6lMtR4Bc6rFGS1MDAQM/MG86y+gsd1CJJasErRSWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkooYGOgRcXtEPB8RR05b97aIeDAinup/vajbMiVJgzQ5Q78DuPqMdfuAA5l5OXCgvyxJmqKBgZ6ZB4EXz1h9HXBn//mdwEfGXJckaUijzqHPZuYJgP7Xi8dXkiRpFJGZg3eKmAPuzcx395dfzswLT9v+UmaedR49IpaAJYDZ2dndy8vLYyh7sg4fP7nhttnz4LlXuxl3fucF3Rx4gLW1NWZmZqYydlcq9gQ1+6rYE7Tra3Fx8VBmLgzab9tIR4fnImJHZp6IiB3A8xvtmJm3AbcBLCwsZK/XG3HI6dmz774Nt+2dP8Wth0f9Zzy3oz/V6+S4g6ysrLAVX6dzqdgT1OyrYk8wmb5GnXK5B7ix//xG4O7xlCNJGlWTjy3eBTwMXBERxyLiJmA/8MGIeAr4YH9ZkjRFA+cKMvOGDTZdNeZaJEkteKWoJBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEdvafHNEHAW+A7wGnMrMhXEUJUkaXqtA71vMzBfGcBxJUgtOuUhSEW0DPYG/iYhDEbE0joIkSaOJzBz9myO+LzOfjYiLgQeBmzPz4Bn7LAFLALOzs7uXl5dHGuvw8ZMj19ml2fPguVe7Ofb8zgu6OfAAa2trzMzMTGXsrlTsCWr2VbEnaNfX4uLioSbvUbYK9P9zoIhbgLXM/L2N9llYWMjV1dWRjj+3774RK+vW3vlT3Hp4HG9FvNHR/dd2ctxBVlZW6PV6Uxm7KxV7gpp9VewJ2vUVEY0CfeQpl4g4PyLe+vpz4EPAkVGPJ0lqp82p5Szw5xHx+nG+lJl/PZaqJElDGznQM/MZ4IfGWIskqQU/tihJRRjoklSEgS5JRRjoklRENx+glqRNaJrXs9xx9fmdj+EZuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhFeWKQ3OHz8JHumdAHGtP5TD6kCz9AlqQgDXZKKMNAlqQgDXZKKMNAlqQgDXZKKMNAlqQgDXZKK8MIiacra/C86e+dPjXwRmBdx1eMZuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhFeWLSJtbngpI2981MZFuiu5zYX4EhbhWfoklSEgS5JRRjoklSEgS5JRbQK9Ii4OiKejIinI2LfuIqSJA1v5ECPiDcBnwN+HNgF3BARu8ZVmCRpOG3O0N8PPJ2Zz2TmfwPLwHXjKUuSNKw2gb4T+I/Tlo/110mSpiAyc7RvjPg48GOZ+cn+8ieA92fmzWfstwQs9RevAJ4cvdxNaTvwwrSLGDN72joq9lWxJ2jX12WZ+fZBO7W5UvQYcOlpy5cAz565U2beBtzWYpxNLSJWM3Nh2nWMkz1tHRX7qtgTTKavNlMu/wRcHhHviIi3ANcD94ynLEnSsEY+Q8/MUxHxaeAB4E3A7Zn5+NgqkyQNpdXNuTLzfuD+MdWyVVWcTrKnraNiXxV7ggn0NfKbopKkzcVL/yWpCAO9gUG3OIiIPRHxrYh4tP/45DTqHFaTWzdExE9GxDci4vGI+NKkaxxWg9fqs6e9Tv8SES9Po85hNejr+yPioYh4JCIei4hrplHnMBr0dFlEHOj3sxIRl0yjzmFExO0R8XxEHNlge0TEH/R7fiwi3jfWAjLTxzkerL/h+6/ADwBvAb4O7Dpjnz3AH0671g76uhx4BLiov3zxtOtu29MZ+9/M+pv5U699DK/VbcAv9p/vAo5Ou+4x9PSnwI395z8KfHHadTfo60eA9wFHNth+DfBXQABXAl8b5/ieoQ9W9RYHTfr6eeBzmfkSQGY+P+EahzXsa3UDcNdEKmunSV8JfE//+QWc5ZqQTaZJT7uAA/3nD51l+6aTmQeBF8+xy3XAn+S6rwIXRsSOcY1voA/W9BYHH+v/CfWViLj0LNs3myZ9vRN4Z0T8Y0R8NSKunlh1o2l8O4qIuAx4B/B3E6irrSZ93QL8dEQcY/2TZzezuTXp6evAx/rPPwq8NSK+dwK1danTW6YY6IPFWdad+dGgvwTmMvMHgb8F7uy8qvaa9LWN9WmXHutns38cERd2XFcbTXp63fXAVzLztQ7rGZcmfd0A3JGZl7D+Z/0XI2Iz/3436elXgQ9ExCPAB4DjwKmuC+vYMD+jQ9vML/hmMfAWB5n57cz8r/7i54HdE6qtjSa3bjgG3J2Z/5OZ/8b6fXgun1B9o2h0O4q+69ka0y3QrK+bgC8DZObDwHezfu+QzarJ79WzmfkTmfle4Df7605OrsRODPMzOjQDfbCBtzg4Yw7sw8ATE6xvVE1u3fAXwCJARGxnfQrmmYlWOZxGt6OIiCuAi4CHJ1zfqJr09U3gKoCIeBfrgf6tiVY5nCa/V9tP+yvj14HbJ1xjF+4Bfqb/aZcrgZOZeWJcB291pej/B7nBLQ4i4reA1cy8B/iliPgw638Ovsj6p142tYZ9PQB8KCK+AbwG/Fpmfnt6VZ9bw55gfXpiOfsfO9jsGva1F/h8RPwK63/C79nM/TXsqQf8bkQkcBD41NQKbigi7mK97u399zM+A7wZIDP/iPX3N64Bngb+E/jZsY6/iV9zSdIQnHKRpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkq4n8BUTSqpFRBAZ8AAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%matplotlib inline\n",
"geneDf.rocauc.hist()"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
" Gene | \n",
"
\n",
" \n",
" \n",
" \n",
" 3 | \n",
" 0.991546 | \n",
" 457 | \n",
" 38.312977 | \n",
" 0.507767 | \n",
" (14, 32092134) | \n",
" 14 | \n",
" 32092134 | \n",
" 14 | \n",
" ARHGAP5 | \n",
"
\n",
" \n",
" 7 | \n",
" 0.738417 | \n",
" 0 | \n",
" 0.001931 | \n",
" 0.500000 | \n",
" (8, 142877758) | \n",
" 8 | \n",
" 142877758 | \n",
" 11 | \n",
" CYP11B1 | \n",
"
\n",
" \n",
" 8 | \n",
" 0.989537 | \n",
" 91 | \n",
" 5.262948 | \n",
" 0.503589 | \n",
" (X, 24789042) | \n",
" X | \n",
" 24789042 | \n",
" 11 | \n",
" POLA1 | \n",
"
\n",
" \n",
" 11 | \n",
" 0.684925 | \n",
" 286 | \n",
" 16.242366 | \n",
" 0.613307 | \n",
" (6, 29944050) | \n",
" 6 | \n",
" 29944050 | \n",
" 9 | \n",
" HLA | \n",
"
\n",
" \n",
" 13 | \n",
" 0.998226 | \n",
" 524 | \n",
" 1291.374046 | \n",
" 0.694932 | \n",
" (X, 24788994) | \n",
" X | \n",
" 24788994 | \n",
" 9 | \n",
" POLA1 | \n",
"
\n",
" \n",
" 14 | \n",
" 0.994127 | \n",
" 431 | \n",
" 36.320537 | \n",
" 0.621622 | \n",
" (12, 6018369) | \n",
" 12 | \n",
" 6018369 | \n",
" 9 | \n",
" VWF | \n",
"
\n",
" \n",
" 18 | \n",
" 0.924295 | \n",
" 502 | \n",
" 79.051527 | \n",
" 0.615436 | \n",
" (12, 6018901) | \n",
" 12 | \n",
" 6018901 | \n",
" 7 | \n",
" VWF | \n",
"
\n",
" \n",
" 19 | \n",
" 0.967688 | \n",
" 524 | \n",
" 950.933206 | \n",
" 0.671795 | \n",
" (6, 31271836) | \n",
" 6 | \n",
" 31271836 | \n",
" 7 | \n",
" HLA | \n",
"
\n",
" \n",
" 20 | \n",
" 0.524973 | \n",
" 502 | \n",
" 1370.792233 | \n",
" 0.456735 | \n",
" (6, 31356729) | \n",
" 6 | \n",
" 31356729 | \n",
" 7 | \n",
" MIR6891 | \n",
"
\n",
" \n",
" 21 | \n",
" 0.966475 | \n",
" 1 | \n",
" 0.392720 | \n",
" 0.500000 | \n",
" (7, 117548682) | \n",
" 7 | \n",
" 117548682 | \n",
" 7 | \n",
" CFTR | \n",
"
\n",
" \n",
" 22 | \n",
" 0.843596 | \n",
" 522 | \n",
" 2902.068702 | \n",
" 0.470996 | \n",
" (6, 29943406) | \n",
" 6 | \n",
" 29943406 | \n",
" 7 | \n",
" HLA | \n",
"
\n",
" \n",
" 23 | \n",
" 0.624032 | \n",
" 480 | \n",
" 2319.403475 | \n",
" 0.517632 | \n",
" (6, 29943422) | \n",
" 6 | \n",
" 29943422 | \n",
" 7 | \n",
" HLA | \n",
"
\n",
" \n",
" 25 | \n",
" 0.976915 | \n",
" 3 | \n",
" 0.319915 | \n",
" 0.498495 | \n",
" (7, 142750675) | \n",
" 7 | \n",
" 142750675 | \n",
" 7 | \n",
" PRSS1 | \n",
"
\n",
" \n",
" 26 | \n",
" 0.881503 | \n",
" 212 | \n",
" 9.790076 | \n",
" 0.500000 | \n",
" (7, 152238825) | \n",
" 7 | \n",
" 152238825 | \n",
" 6 | \n",
" KMT2C | \n",
"
\n",
" \n",
" 28 | \n",
" 0.501689 | \n",
" 472 | \n",
" 84.025794 | \n",
" 0.500000 | \n",
" (9, 128257486) | \n",
" 9 | \n",
" 128257486 | \n",
" 6 | \n",
" GOLGA2 | \n",
"
\n",
" \n",
" 29 | \n",
" 0.962142 | \n",
" 4 | \n",
" 0.215953 | \n",
" 0.501053 | \n",
" (7, 142750600) | \n",
" 7 | \n",
" 142750600 | \n",
" 6 | \n",
" PRSS1 | \n",
"
\n",
" \n",
" 30 | \n",
" 0.116652 | \n",
" 481 | \n",
" 300.219466 | \n",
" 0.601301 | \n",
" (7, 55165350) | \n",
" 7 | \n",
" 55165350 | \n",
" 6 | \n",
" EGFR | \n",
"
\n",
" \n",
" 32 | \n",
" 0.961239 | \n",
" 3 | \n",
" 0.492366 | \n",
" 0.479700 | \n",
" (4, 144120554) | \n",
" 4 | \n",
" 144120554 | \n",
" 6 | \n",
" GYPA | \n",
"
\n",
" \n",
" 33 | \n",
" 0.867048 | \n",
" 315 | \n",
" 18.980583 | \n",
" 0.640445 | \n",
" (6, 29944118) | \n",
" 6 | \n",
" 29944118 | \n",
" 6 | \n",
" HLA | \n",
"
\n",
" \n",
" 36 | \n",
" 0.991734 | \n",
" 9 | \n",
" 1.315488 | \n",
" 0.507143 | \n",
" (19, 14766987) | \n",
" 19 | \n",
" 14766987 | \n",
" 5 | \n",
" ADGRE2 | \n",
"
\n",
" \n",
" 38 | \n",
" 0.956522 | \n",
" 0 | \n",
" 0.064516 | \n",
" 0.500000 | \n",
" (7, 127611678) | \n",
" 7 | \n",
" 127611678 | \n",
" 5 | \n",
" PAX4 | \n",
"
\n",
" \n",
" 39 | \n",
" 0.981141 | \n",
" 0 | \n",
" 0.013699 | \n",
" 0.504950 | \n",
" (1, 235775088) | \n",
" 1 | \n",
" 235775088 | \n",
" 5 | \n",
" LYST | \n",
"
\n",
" \n",
" 40 | \n",
" 0.922483 | \n",
" 1 | \n",
" 2.292089 | \n",
" 0.501445 | \n",
" (1, 173828313) | \n",
" 1 | \n",
" 173828313 | \n",
" 5 | \n",
" DARS2 | \n",
"
\n",
" \n",
" 41 | \n",
" 0.991328 | \n",
" 15 | \n",
" 1.331707 | \n",
" 0.523174 | \n",
" (2, 166281810) | \n",
" 2 | \n",
" 166281810 | \n",
" 5 | \n",
" LOC101929680 | \n",
"
\n",
" \n",
" 42 | \n",
" 0.944444 | \n",
" 8 | \n",
" 2.370213 | \n",
" 0.500000 | \n",
" (6, 32664778) | \n",
" 6 | \n",
" 32664778 | \n",
" 5 | \n",
" HLA | \n",
"
\n",
" \n",
" 43 | \n",
" 0.072989 | \n",
" 401 | \n",
" 747.178357 | \n",
" 0.569311 | \n",
" (6, 31270232) | \n",
" 6 | \n",
" 31270232 | \n",
" 5 | \n",
" HLA | \n",
"
\n",
" \n",
" 44 | \n",
" 0.890402 | \n",
" 71 | \n",
" 7.585603 | \n",
" 0.508440 | \n",
" (12, 52897420) | \n",
" 12 | \n",
" 52897420 | \n",
" 5 | \n",
" KRT8 | \n",
"
\n",
" \n",
" 46 | \n",
" 0.468347 | \n",
" 517 | \n",
" 2905.948375 | \n",
" 0.499899 | \n",
" (6, 29942845) | \n",
" 6 | \n",
" 29942845 | \n",
" 5 | \n",
" HLA | \n",
"
\n",
" \n",
" 50 | \n",
" 0.561464 | \n",
" 500 | \n",
" 73.971374 | \n",
" 0.545052 | \n",
" (12, 6018910) | \n",
" 12 | \n",
" 6018910 | \n",
" 5 | \n",
" VWF | \n",
"
\n",
" \n",
" 51 | \n",
" 0.996116 | \n",
" 524 | \n",
" 189.522901 | \n",
" 0.500495 | \n",
" (5, 236441) | \n",
" 5 | \n",
" 236441 | \n",
" 4 | \n",
" SDHA | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 59 | \n",
" 0.775089 | \n",
" 315 | \n",
" 17.183206 | \n",
" 0.672237 | \n",
" (6, 29944059) | \n",
" 6 | \n",
" 29944059 | \n",
" 4 | \n",
" HLA | \n",
"
\n",
" \n",
" 63 | \n",
" 0.556916 | \n",
" 283 | \n",
" 553.902748 | \n",
" 0.533268 | \n",
" (6, 31270214) | \n",
" 6 | \n",
" 31270214 | \n",
" 4 | \n",
" HLA | \n",
"
\n",
" \n",
" 64 | \n",
" 0.025849 | \n",
" 454 | \n",
" 1296.377395 | \n",
" 0.552955 | \n",
" (6, 29944124) | \n",
" 6 | \n",
" 29944124 | \n",
" 4 | \n",
" HLA | \n",
"
\n",
" \n",
" 66 | \n",
" 0.028601 | \n",
" 484 | \n",
" 1758.949290 | \n",
" 0.509784 | \n",
" (6, 31356377) | \n",
" 6 | \n",
" 31356377 | \n",
" 4 | \n",
" MIR6891 | \n",
"
\n",
" \n",
" 68 | \n",
" 0.380695 | \n",
" 512 | \n",
" 2479.959924 | \n",
" 0.497330 | \n",
" (6, 29944132) | \n",
" 6 | \n",
" 29944132 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 69 | \n",
" 0.069974 | \n",
" 516 | \n",
" 2769.652672 | \n",
" 0.599747 | \n",
" (6, 29944135) | \n",
" 6 | \n",
" 29944135 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 70 | \n",
" 0.002137 | \n",
" 16 | \n",
" 2.042345 | \n",
" 0.495708 | \n",
" (1, 237591774) | \n",
" 1 | \n",
" 237591774 | \n",
" 3 | \n",
" RYR2 | \n",
"
\n",
" \n",
" 71 | \n",
" 0.808434 | \n",
" 524 | \n",
" 3416.650763 | \n",
" 0.442027 | \n",
" (6, 29944376) | \n",
" 6 | \n",
" 29944376 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 72 | \n",
" 0.232668 | \n",
" 524 | \n",
" 3618.646947 | \n",
" 0.603043 | \n",
" (6, 29944151) | \n",
" 6 | \n",
" 29944151 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 73 | \n",
" 0.741291 | \n",
" 506 | \n",
" 315.198473 | \n",
" 0.621487 | \n",
" (6, 32664883) | \n",
" 6 | \n",
" 32664883 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 75 | \n",
" 0.863203 | \n",
" 395 | \n",
" 31.936902 | \n",
" 0.488386 | \n",
" (10, 4999206) | \n",
" 10 | \n",
" 4999206 | \n",
" 3 | \n",
" AKR1C2 | \n",
"
\n",
" \n",
" 77 | \n",
" 0.380747 | \n",
" 480 | \n",
" 191.682692 | \n",
" 0.596527 | \n",
" (6, 32661384) | \n",
" 6 | \n",
" 32661384 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 78 | \n",
" 0.778990 | \n",
" 27 | \n",
" 2.581081 | \n",
" 0.645061 | \n",
" (6, 32661333) | \n",
" 6 | \n",
" 32661333 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 79 | \n",
" 0.007407 | \n",
" 0 | \n",
" 1.307692 | \n",
" 0.481203 | \n",
" (12, 57099758) | \n",
" 12 | \n",
" 57099758 | \n",
" 3 | \n",
" STAT6 | \n",
"
\n",
" \n",
" 81 | \n",
" 0.993934 | \n",
" 523 | \n",
" 1127.133588 | \n",
" 0.582806 | \n",
" (16, 2106849) | \n",
" 16 | \n",
" 2106849 | \n",
" 3 | \n",
" MIR6511B1 | \n",
"
\n",
" \n",
" 84 | \n",
" 0.061988 | \n",
" 524 | \n",
" 2200.085878 | \n",
" 0.573877 | \n",
" (6, 29942916) | \n",
" 6 | \n",
" 29942916 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 85 | \n",
" 0.756602 | \n",
" 29 | \n",
" 2.614504 | \n",
" 0.669605 | \n",
" (3, 75630855) | \n",
" 3 | \n",
" 75630855 | \n",
" 3 | \n",
" LOC107986102 | \n",
"
\n",
" \n",
" 86 | \n",
" 0.224100 | \n",
" 13 | \n",
" 1.728489 | \n",
" 0.588009 | \n",
" (3, 75630794) | \n",
" 3 | \n",
" 75630794 | \n",
" 3 | \n",
" LOC107986102 | \n",
"
\n",
" \n",
" 87 | \n",
" 0.549532 | \n",
" 103 | \n",
" 7.149510 | \n",
" 0.523889 | \n",
" (3, 49686483) | \n",
" 3 | \n",
" 49686483 | \n",
" 3 | \n",
" MST1 | \n",
"
\n",
" \n",
" 88 | \n",
" 0.683612 | \n",
" 519 | \n",
" 1443.973282 | \n",
" 0.499179 | \n",
" (6, 29943463) | \n",
" 6 | \n",
" 29943463 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 89 | \n",
" 0.758810 | \n",
" 35 | \n",
" 3.059961 | \n",
" 0.520303 | \n",
" (6, 29943667) | \n",
" 6 | \n",
" 29943667 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 93 | \n",
" 0.493328 | \n",
" 512 | \n",
" 82.120229 | \n",
" 0.692187 | \n",
" (17, 7673704) | \n",
" 17 | \n",
" 7673704 | \n",
" 3 | \n",
" TP53 | \n",
"
\n",
" \n",
" 94 | \n",
" 0.403970 | \n",
" 55 | \n",
" 3.786275 | \n",
" 0.490953 | \n",
" (17, 21416556) | \n",
" 17 | \n",
" 21416556 | \n",
" 3 | \n",
" KCNJ12 | \n",
"
\n",
" \n",
" 95 | \n",
" 0.666459 | \n",
" 523 | \n",
" 986.114504 | \n",
" 0.500538 | \n",
" (8, 100709671) | \n",
" 8 | \n",
" 100709671 | \n",
" 3 | \n",
" PABPC1 | \n",
"
\n",
" \n",
" 96 | \n",
" 0.951125 | \n",
" 0 | \n",
" 0.047244 | \n",
" 0.509217 | \n",
" (3, 183959847) | \n",
" 3 | \n",
" 183959847 | \n",
" 3 | \n",
" ABCC5 | \n",
"
\n",
" \n",
" 99 | \n",
" 0.870781 | \n",
" 495 | \n",
" 23.572519 | \n",
" 0.462571 | \n",
" (22, 24627926) | \n",
" 22 | \n",
" 24627926 | \n",
" 3 | \n",
" GGT1 | \n",
"
\n",
" \n",
" 100 | \n",
" 0.754902 | \n",
" 0 | \n",
" 0.001957 | \n",
" 0.500000 | \n",
" (20, 8788776) | \n",
" 20 | \n",
" 8788776 | \n",
" 3 | \n",
" PLCB1 | \n",
"
\n",
" \n",
" 101 | \n",
" 0.289047 | \n",
" 205 | \n",
" 13.273622 | \n",
" 0.551120 | \n",
" (6, 31271875) | \n",
" 6 | \n",
" 31271875 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 102 | \n",
" 0.318593 | \n",
" 524 | \n",
" 977.551527 | \n",
" 0.504434 | \n",
" (6, 31271839) | \n",
" 6 | \n",
" 31271839 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 103 | \n",
" 0.953258 | \n",
" 522 | \n",
" 2469.611111 | \n",
" 0.582500 | \n",
" (6, 31356399) | \n",
" 6 | \n",
" 31356399 | \n",
" 3 | \n",
" MIR6891 | \n",
"
\n",
" \n",
"
\n",
"
67 rows × 9 columns
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"3 0.991546 457 38.312977 0.507767 (14, 32092134) 14 32092134 \n",
"7 0.738417 0 0.001931 0.500000 (8, 142877758) 8 142877758 \n",
"8 0.989537 91 5.262948 0.503589 (X, 24789042) X 24789042 \n",
"11 0.684925 286 16.242366 0.613307 (6, 29944050) 6 29944050 \n",
"13 0.998226 524 1291.374046 0.694932 (X, 24788994) X 24788994 \n",
"14 0.994127 431 36.320537 0.621622 (12, 6018369) 12 6018369 \n",
"18 0.924295 502 79.051527 0.615436 (12, 6018901) 12 6018901 \n",
"19 0.967688 524 950.933206 0.671795 (6, 31271836) 6 31271836 \n",
"20 0.524973 502 1370.792233 0.456735 (6, 31356729) 6 31356729 \n",
"21 0.966475 1 0.392720 0.500000 (7, 117548682) 7 117548682 \n",
"22 0.843596 522 2902.068702 0.470996 (6, 29943406) 6 29943406 \n",
"23 0.624032 480 2319.403475 0.517632 (6, 29943422) 6 29943422 \n",
"25 0.976915 3 0.319915 0.498495 (7, 142750675) 7 142750675 \n",
"26 0.881503 212 9.790076 0.500000 (7, 152238825) 7 152238825 \n",
"28 0.501689 472 84.025794 0.500000 (9, 128257486) 9 128257486 \n",
"29 0.962142 4 0.215953 0.501053 (7, 142750600) 7 142750600 \n",
"30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n",
"32 0.961239 3 0.492366 0.479700 (4, 144120554) 4 144120554 \n",
"33 0.867048 315 18.980583 0.640445 (6, 29944118) 6 29944118 \n",
"36 0.991734 9 1.315488 0.507143 (19, 14766987) 19 14766987 \n",
"38 0.956522 0 0.064516 0.500000 (7, 127611678) 7 127611678 \n",
"39 0.981141 0 0.013699 0.504950 (1, 235775088) 1 235775088 \n",
"40 0.922483 1 2.292089 0.501445 (1, 173828313) 1 173828313 \n",
"41 0.991328 15 1.331707 0.523174 (2, 166281810) 2 166281810 \n",
"42 0.944444 8 2.370213 0.500000 (6, 32664778) 6 32664778 \n",
"43 0.072989 401 747.178357 0.569311 (6, 31270232) 6 31270232 \n",
"44 0.890402 71 7.585603 0.508440 (12, 52897420) 12 52897420 \n",
"46 0.468347 517 2905.948375 0.499899 (6, 29942845) 6 29942845 \n",
"50 0.561464 500 73.971374 0.545052 (12, 6018910) 12 6018910 \n",
"51 0.996116 524 189.522901 0.500495 (5, 236441) 5 236441 \n",
".. ... ... ... ... ... .. ... \n",
"59 0.775089 315 17.183206 0.672237 (6, 29944059) 6 29944059 \n",
"63 0.556916 283 553.902748 0.533268 (6, 31270214) 6 31270214 \n",
"64 0.025849 454 1296.377395 0.552955 (6, 29944124) 6 29944124 \n",
"66 0.028601 484 1758.949290 0.509784 (6, 31356377) 6 31356377 \n",
"68 0.380695 512 2479.959924 0.497330 (6, 29944132) 6 29944132 \n",
"69 0.069974 516 2769.652672 0.599747 (6, 29944135) 6 29944135 \n",
"70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n",
"71 0.808434 524 3416.650763 0.442027 (6, 29944376) 6 29944376 \n",
"72 0.232668 524 3618.646947 0.603043 (6, 29944151) 6 29944151 \n",
"73 0.741291 506 315.198473 0.621487 (6, 32664883) 6 32664883 \n",
"75 0.863203 395 31.936902 0.488386 (10, 4999206) 10 4999206 \n",
"77 0.380747 480 191.682692 0.596527 (6, 32661384) 6 32661384 \n",
"78 0.778990 27 2.581081 0.645061 (6, 32661333) 6 32661333 \n",
"79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n",
"81 0.993934 523 1127.133588 0.582806 (16, 2106849) 16 2106849 \n",
"84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n",
"85 0.756602 29 2.614504 0.669605 (3, 75630855) 3 75630855 \n",
"86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n",
"87 0.549532 103 7.149510 0.523889 (3, 49686483) 3 49686483 \n",
"88 0.683612 519 1443.973282 0.499179 (6, 29943463) 6 29943463 \n",
"89 0.758810 35 3.059961 0.520303 (6, 29943667) 6 29943667 \n",
"93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n",
"94 0.403970 55 3.786275 0.490953 (17, 21416556) 17 21416556 \n",
"95 0.666459 523 986.114504 0.500538 (8, 100709671) 8 100709671 \n",
"96 0.951125 0 0.047244 0.509217 (3, 183959847) 3 183959847 \n",
"99 0.870781 495 23.572519 0.462571 (22, 24627926) 22 24627926 \n",
"100 0.754902 0 0.001957 0.500000 (20, 8788776) 20 8788776 \n",
"101 0.289047 205 13.273622 0.551120 (6, 31271875) 6 31271875 \n",
"102 0.318593 524 977.551527 0.504434 (6, 31271839) 6 31271839 \n",
"103 0.953258 522 2469.611111 0.582500 (6, 31356399) 6 31356399 \n",
"\n",
" tcga_wxs_count Gene \n",
"3 14 ARHGAP5 \n",
"7 11 CYP11B1 \n",
"8 11 POLA1 \n",
"11 9 HLA \n",
"13 9 POLA1 \n",
"14 9 VWF \n",
"18 7 VWF \n",
"19 7 HLA \n",
"20 7 MIR6891 \n",
"21 7 CFTR \n",
"22 7 HLA \n",
"23 7 HLA \n",
"25 7 PRSS1 \n",
"26 6 KMT2C \n",
"28 6 GOLGA2 \n",
"29 6 PRSS1 \n",
"30 6 EGFR \n",
"32 6 GYPA \n",
"33 6 HLA \n",
"36 5 ADGRE2 \n",
"38 5 PAX4 \n",
"39 5 LYST \n",
"40 5 DARS2 \n",
"41 5 LOC101929680 \n",
"42 5 HLA \n",
"43 5 HLA \n",
"44 5 KRT8 \n",
"46 5 HLA \n",
"50 5 VWF \n",
"51 4 SDHA \n",
".. ... ... \n",
"59 4 HLA \n",
"63 4 HLA \n",
"64 4 HLA \n",
"66 4 MIR6891 \n",
"68 3 HLA \n",
"69 3 HLA \n",
"70 3 RYR2 \n",
"71 3 HLA \n",
"72 3 HLA \n",
"73 3 HLA \n",
"75 3 AKR1C2 \n",
"77 3 HLA \n",
"78 3 HLA \n",
"79 3 STAT6 \n",
"81 3 MIR6511B1 \n",
"84 3 HLA \n",
"85 3 LOC107986102 \n",
"86 3 LOC107986102 \n",
"87 3 MST1 \n",
"88 3 HLA \n",
"89 3 HLA \n",
"93 3 TP53 \n",
"94 3 KCNJ12 \n",
"95 3 PABPC1 \n",
"96 3 ABCC5 \n",
"99 3 GGT1 \n",
"100 3 PLCB1 \n",
"101 3 HLA \n",
"102 3 HLA \n",
"103 3 MIR6891 \n",
"\n",
"[67 rows x 9 columns]"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top100GeneDf[top100GeneDf.rocauc<0.7]"
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" auprc | \n",
" rnaseq_n | \n",
" rnaseq_rd | \n",
" rocauc | \n",
" vcfIndex | \n",
" Chr | \n",
" Pos | \n",
" tcga_wxs_count | \n",
" Gene | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.998211 | \n",
" 519 | \n",
" 133.822519 | \n",
" 0.984290 | \n",
" (2, 208248388) | \n",
" 2 | \n",
" 208248388 | \n",
" 371 | \n",
" IDH1 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.543616 | \n",
" 520 | \n",
" 122.076336 | \n",
" 0.945266 | \n",
" (17, 7673803) | \n",
" 17 | \n",
" 7673803 | \n",
" 59 | \n",
" TP53 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.865149 | \n",
" 519 | \n",
" 133.375954 | \n",
" 0.896410 | \n",
" (2, 208248389) | \n",
" 2 | \n",
" 208248389 | \n",
" 38 | \n",
" IDH1 | \n",
"
\n",
" \n",
" 5 | \n",
" 1.000000 | \n",
" 524 | \n",
" 353.841603 | \n",
" 1.000000 | \n",
" (15, 90088606) | \n",
" 15 | \n",
" 90088606 | \n",
" 15 | \n",
" IDH2 | \n",
"
\n",
" \n",
" 6 | \n",
" 0.954091 | \n",
" 520 | \n",
" 121.372849 | \n",
" 0.948889 | \n",
" (17, 7673802) | \n",
" 17 | \n",
" 7673802 | \n",
" 15 | \n",
" TP53 | \n",
"
\n",
" \n",
" 9 | \n",
" 0.999940 | \n",
" 513 | \n",
" 210.776718 | \n",
" 0.996541 | \n",
" (1, 109690516) | \n",
" 1 | \n",
" 109690516 | \n",
" 10 | \n",
" GSTM1 | \n",
"
\n",
" \n",
" 10 | \n",
" 0.975154 | \n",
" 496 | \n",
" 59.135496 | \n",
" 0.999370 | \n",
" (17, 7674872) | \n",
" 17 | \n",
" 7674872 | \n",
" 10 | \n",
" TP53 | \n",
"
\n",
" \n",
" 15 | \n",
" 0.895074 | \n",
" 520 | \n",
" 124.636711 | \n",
" 0.929366 | \n",
" (17, 7674220) | \n",
" 17 | \n",
" 7674220 | \n",
" 8 | \n",
" TP53 | \n",
"
\n",
" \n",
" 16 | \n",
" 0.850000 | \n",
" 510 | \n",
" 91.690840 | \n",
" 0.998394 | \n",
" (17, 7675076) | \n",
" 17 | \n",
" 7675076 | \n",
" 8 | \n",
" TP53 | \n",
"
\n",
" \n",
" 17 | \n",
" 0.779858 | \n",
" 509 | \n",
" 107.205374 | \n",
" 0.765568 | \n",
" (17, 7675088) | \n",
" 17 | \n",
" 7675088 | \n",
" 8 | \n",
" TP53 | \n",
"
\n",
" \n",
" 24 | \n",
" 1.000000 | \n",
" 520 | \n",
" 123.956023 | \n",
" 1.000000 | \n",
" (17, 7674221) | \n",
" 17 | \n",
" 7674221 | \n",
" 7 | \n",
" TP53 | \n",
"
\n",
" \n",
" 27 | \n",
" 0.777019 | \n",
" 518 | \n",
" 121.843511 | \n",
" 0.875674 | \n",
" (17, 7673776) | \n",
" 17 | \n",
" 7673776 | \n",
" 6 | \n",
" TP53 | \n",
"
\n",
" \n",
" 31 | \n",
" 0.999499 | \n",
" 8 | \n",
" 2.092742 | \n",
" 0.781609 | \n",
" (12, 2685853) | \n",
" 12 | \n",
" 2685853 | \n",
" 6 | \n",
" CACNA1C | \n",
"
\n",
" \n",
" 34 | \n",
" 0.831944 | \n",
" 519 | \n",
" 120.805344 | \n",
" 0.871324 | \n",
" (17, 7674230) | \n",
" 17 | \n",
" 7674230 | \n",
" 5 | \n",
" TP53 | \n",
"
\n",
" \n",
" 35 | \n",
" 0.795725 | \n",
" 513 | \n",
" 99.967557 | \n",
" 0.869141 | \n",
" (17, 7674945) | \n",
" 17 | \n",
" 7674945 | \n",
" 5 | \n",
" TP53 | \n",
"
\n",
" \n",
" 37 | \n",
" 0.718190 | \n",
" 464 | \n",
" 182.276718 | \n",
" 0.845446 | \n",
" (7, 55154129) | \n",
" 7 | \n",
" 55154129 | \n",
" 5 | \n",
" EGFR | \n",
"
\n",
" \n",
" 45 | \n",
" 0.559769 | \n",
" 524 | \n",
" 2793.311069 | \n",
" 0.797976 | \n",
" (6, 29942825) | \n",
" 6 | \n",
" 29942825 | \n",
" 5 | \n",
" HLA | \n",
"
\n",
" \n",
" 47 | \n",
" 0.922913 | \n",
" 368 | \n",
" 21.582061 | \n",
" 0.798079 | \n",
" (6, 29944102) | \n",
" 6 | \n",
" 29944102 | \n",
" 5 | \n",
" HLA | \n",
"
\n",
" \n",
" 48 | \n",
" 0.802333 | \n",
" 371 | \n",
" 21.608779 | \n",
" 0.777366 | \n",
" (6, 29944103) | \n",
" 6 | \n",
" 29944103 | \n",
" 5 | \n",
" HLA | \n",
"
\n",
" \n",
" 49 | \n",
" 0.942262 | \n",
" 524 | \n",
" 4361.543893 | \n",
" 0.960710 | \n",
" (6, 29944168) | \n",
" 6 | \n",
" 29944168 | \n",
" 5 | \n",
" HLA | \n",
"
\n",
" \n",
" 60 | \n",
" 0.984222 | \n",
" 315 | \n",
" 16.535373 | \n",
" 0.803603 | \n",
" (6, 29944067) | \n",
" 6 | \n",
" 29944067 | \n",
" 4 | \n",
" HLA | \n",
"
\n",
" \n",
" 62 | \n",
" 0.916660 | \n",
" 399 | \n",
" 759.471660 | \n",
" 0.916426 | \n",
" (6, 31270233) | \n",
" 6 | \n",
" 31270233 | \n",
" 4 | \n",
" HLA | \n",
"
\n",
" \n",
" 65 | \n",
" 0.916102 | \n",
" 524 | \n",
" 3459.543893 | \n",
" 0.740310 | \n",
" (6, 29944144) | \n",
" 6 | \n",
" 29944144 | \n",
" 4 | \n",
" HLA | \n",
"
\n",
" \n",
" 67 | \n",
" 1.000000 | \n",
" 516 | \n",
" 115.853053 | \n",
" 1.000000 | \n",
" (17, 7676044) | \n",
" 17 | \n",
" 7676044 | \n",
" 4 | \n",
" TP53 | \n",
"
\n",
" \n",
" 74 | \n",
" 0.026316 | \n",
" 436 | \n",
" 162.306796 | \n",
" 0.784810 | \n",
" (6, 32664926) | \n",
" 6 | \n",
" 32664926 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 76 | \n",
" 0.815908 | \n",
" 493 | \n",
" 215.211832 | \n",
" 0.928361 | \n",
" (6, 32661393) | \n",
" 6 | \n",
" 32661393 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 82 | \n",
" 0.995522 | \n",
" 517 | \n",
" 1391.395753 | \n",
" 0.989221 | \n",
" (6, 29942795) | \n",
" 6 | \n",
" 29942795 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 83 | \n",
" 0.965023 | \n",
" 510 | \n",
" 2437.399610 | \n",
" 0.856497 | \n",
" (6, 29942858) | \n",
" 6 | \n",
" 29942858 | \n",
" 3 | \n",
" HLA | \n",
"
\n",
" \n",
" 90 | \n",
" 0.858112 | \n",
" 523 | \n",
" 66.984733 | \n",
" 0.899413 | \n",
" (3, 179234284) | \n",
" 3 | \n",
" 179234284 | \n",
" 3 | \n",
" PIK3CA | \n",
"
\n",
" \n",
" 91 | \n",
" 0.844742 | \n",
" 513 | \n",
" 88.143130 | \n",
" 0.874750 | \n",
" (17, 7674888) | \n",
" 17 | \n",
" 7674888 | \n",
" 3 | \n",
" TP53 | \n",
"
\n",
" \n",
" 92 | \n",
" 1.000000 | \n",
" 517 | \n",
" 117.526718 | \n",
" 1.000000 | \n",
" (17, 7674256) | \n",
" 17 | \n",
" 7674256 | \n",
" 3 | \n",
" TP53 | \n",
"
\n",
" \n",
" 97 | \n",
" 0.671406 | \n",
" 501 | \n",
" 94.854127 | \n",
" 0.794175 | \n",
" (17, 31350209) | \n",
" 17 | \n",
" 31350209 | \n",
" 3 | \n",
" NF1 | \n",
"
\n",
" \n",
" 98 | \n",
" 0.530668 | \n",
" 518 | \n",
" 380.994275 | \n",
" 0.738304 | \n",
" (5, 68295269) | \n",
" 5 | \n",
" 68295269 | \n",
" 3 | \n",
" PIK3R1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n",
"0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n",
"1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n",
"2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n",
"5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n",
"6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n",
"9 0.999940 513 210.776718 0.996541 (1, 109690516) 1 109690516 \n",
"10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n",
"15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n",
"16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n",
"17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n",
"24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n",
"27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n",
"31 0.999499 8 2.092742 0.781609 (12, 2685853) 12 2685853 \n",
"34 0.831944 519 120.805344 0.871324 (17, 7674230) 17 7674230 \n",
"35 0.795725 513 99.967557 0.869141 (17, 7674945) 17 7674945 \n",
"37 0.718190 464 182.276718 0.845446 (7, 55154129) 7 55154129 \n",
"45 0.559769 524 2793.311069 0.797976 (6, 29942825) 6 29942825 \n",
"47 0.922913 368 21.582061 0.798079 (6, 29944102) 6 29944102 \n",
"48 0.802333 371 21.608779 0.777366 (6, 29944103) 6 29944103 \n",
"49 0.942262 524 4361.543893 0.960710 (6, 29944168) 6 29944168 \n",
"60 0.984222 315 16.535373 0.803603 (6, 29944067) 6 29944067 \n",
"62 0.916660 399 759.471660 0.916426 (6, 31270233) 6 31270233 \n",
"65 0.916102 524 3459.543893 0.740310 (6, 29944144) 6 29944144 \n",
"67 1.000000 516 115.853053 1.000000 (17, 7676044) 17 7676044 \n",
"74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n",
"76 0.815908 493 215.211832 0.928361 (6, 32661393) 6 32661393 \n",
"82 0.995522 517 1391.395753 0.989221 (6, 29942795) 6 29942795 \n",
"83 0.965023 510 2437.399610 0.856497 (6, 29942858) 6 29942858 \n",
"90 0.858112 523 66.984733 0.899413 (3, 179234284) 3 179234284 \n",
"91 0.844742 513 88.143130 0.874750 (17, 7674888) 17 7674888 \n",
"92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n",
"97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n",
"98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n",
"\n",
" tcga_wxs_count Gene \n",
"0 371 IDH1 \n",
"1 59 TP53 \n",
"2 38 IDH1 \n",
"5 15 IDH2 \n",
"6 15 TP53 \n",
"9 10 GSTM1 \n",
"10 10 TP53 \n",
"15 8 TP53 \n",
"16 8 TP53 \n",
"17 8 TP53 \n",
"24 7 TP53 \n",
"27 6 TP53 \n",
"31 6 CACNA1C \n",
"34 5 TP53 \n",
"35 5 TP53 \n",
"37 5 EGFR \n",
"45 5 HLA \n",
"47 5 HLA \n",
"48 5 HLA \n",
"49 5 HLA \n",
"60 4 HLA \n",
"62 4 HLA \n",
"65 4 HLA \n",
"67 4 TP53 \n",
"74 3 HLA \n",
"76 3 HLA \n",
"82 3 HLA \n",
"83 3 HLA \n",
"90 3 PIK3CA \n",
"91 3 TP53 \n",
"92 3 TP53 \n",
"97 3 NF1 \n",
"98 3 PIK3R1 "
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top100GeneDf[top100GeneDf.rocauc>0.7]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}