{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/cellar/users/btsui/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] } ], "source": [ "inVcfDir='/data/cellardata/users/btsui/dbsnp/Homo_sapiens/All_20170710.f1_byte2_not_00.vcf.gz' \n", "vcfDf=pd.read_csv(inVcfDir,sep='\\t',header=None)\n", "vcfDf.columns=['Chr','Pos','RsId','RefBase','AltBase','','','Annot']\n", "vcfDf['Chr']=vcfDf['Chr'].astype(np.str)\n" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [], "source": [ "top100GeneDf=pd.read_pickle('./top_lgg_somatic_sites.pickle').drop_duplicates('vcfIndex').head(n=100)" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [], "source": [ "#vcfDf[vcfDf.Pos==29944050]" ] }, { "cell_type": "code", "execution_count": 178, "metadata": {}, "outputs": [], "source": [ "#top100GeneDf" ] }, { "cell_type": "code", "execution_count": 169, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_count
700.002137162.0423450.495708(1, 237591774)12375917743
790.00740701.3076920.481203(12, 57099758)12570997583
860.224100131.7284890.588009(3, 75630794)3756307943
\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n", "79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n", "86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n", "\n", " tcga_wxs_count \n", "70 3 \n", "79 3 \n", "86 3 " ] }, "execution_count": 169, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top100GeneDf[(top100GeneDf['rnaseq_n']<20)&(top100GeneDf['auprc']<0.5)]#.mean(axis=0)" ] }, { "cell_type": "code", "execution_count": 170, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_count
300.116652481300.2194660.601301(7, 55165350)7551653506
430.072989401747.1783570.569311(6, 31270232)6312702325
560.03585635517.9483750.429899(22, 42127537)22421275374
640.0258494541296.3773950.552955(6, 29944124)6299441244
660.0286014841758.9492900.509784(6, 31356377)6313563774
690.0699745162769.6526720.599747(6, 29944135)6299441353
700.002137162.0423450.495708(1, 237591774)12375917743
740.026316436162.3067960.784810(6, 32664926)6326649263
790.00740701.3076920.481203(12, 57099758)12570997583
840.0619885242200.0858780.573877(6, 29942916)6299429163
\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n", "43 0.072989 401 747.178357 0.569311 (6, 31270232) 6 31270232 \n", "56 0.035856 355 17.948375 0.429899 (22, 42127537) 22 42127537 \n", "64 0.025849 454 1296.377395 0.552955 (6, 29944124) 6 29944124 \n", "66 0.028601 484 1758.949290 0.509784 (6, 31356377) 6 31356377 \n", "69 0.069974 516 2769.652672 0.599747 (6, 29944135) 6 29944135 \n", "70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n", "74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n", "79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n", "84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n", "\n", " tcga_wxs_count \n", "30 6 \n", "43 5 \n", "56 4 \n", "64 4 \n", "66 4 \n", "69 3 \n", "70 3 \n", "74 3 \n", "79 3 \n", "84 3 " ] }, "execution_count": 170, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top100GeneDf[top100GeneDf['auprc']<0.2]" ] }, { "cell_type": "code", "execution_count": 176, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_count
430.072989401747.1783570.569311(6, 31270232)6312702325
560.03585635517.9483750.429899(22, 42127537)22421275374
640.0258494541296.3773950.552955(6, 29944124)6299441244
660.0286014841758.9492900.509784(6, 31356377)6313563774
690.0699745162769.6526720.599747(6, 29944135)6299441353
700.002137162.0423450.495708(1, 237591774)12375917743
740.026316436162.3067960.784810(6, 32664926)6326649263
790.00740701.3076920.481203(12, 57099758)12570997583
840.0619885242200.0858780.573877(6, 29942916)6299429163
\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "43 0.072989 401 747.178357 0.569311 (6, 31270232) 6 31270232 \n", "56 0.035856 355 17.948375 0.429899 (22, 42127537) 22 42127537 \n", "64 0.025849 454 1296.377395 0.552955 (6, 29944124) 6 29944124 \n", "66 0.028601 484 1758.949290 0.509784 (6, 31356377) 6 31356377 \n", "69 0.069974 516 2769.652672 0.599747 (6, 29944135) 6 29944135 \n", "70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n", "74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n", "79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n", "84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n", "\n", " tcga_wxs_count \n", "43 5 \n", "56 4 \n", "64 4 \n", "66 4 \n", "69 3 \n", "70 3 \n", "74 3 \n", "79 3 \n", "84 3 " ] }, "execution_count": 176, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top100GeneDf[top100GeneDf.auprc<0.1]" ] }, { "cell_type": "code", "execution_count": 173, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.93" ] }, "execution_count": 173, "metadata": {}, "output_type": "execute_result" } ], "source": [ "((top100GeneDf.rnaseq_n)>0).mean()" ] }, { "cell_type": "code", "execution_count": 171, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_count
00.998211519133.8225190.984290(2, 208248388)2208248388371
10.543616520122.0763360.945266(17, 7673803)17767380359
20.865149519133.3759540.896410(2, 208248389)220824838938
51.000000524353.8416031.000000(15, 90088606)159008860615
60.954091520121.3728490.948889(17, 7673802)17767380215
30.99154645738.3129770.507767(14, 32092134)143209213414
70.73841700.0019310.500000(8, 142877758)814287775811
80.989537915.2629480.503589(X, 24789042)X2478904211
90.999940513210.7767180.996541(1, 109690516)110969051610
100.97515449659.1354960.999370(17, 7674872)17767487210
110.68492528616.2423660.613307(6, 29944050)6299440509
130.9982265241291.3740460.694932(X, 24788994)X247889949
140.99412743136.3205370.621622(12, 6018369)1260183699
170.779858509107.2053740.765568(17, 7675088)1776750888
150.895074520124.6367110.929366(17, 7674220)1776742208
160.85000051091.6908400.998394(17, 7675076)1776750768
180.92429550279.0515270.615436(12, 6018901)1260189017
190.967688524950.9332060.671795(6, 31271836)6312718367
200.5249735021370.7922330.456735(6, 31356729)6313567297
210.96647510.3927200.500000(7, 117548682)71175486827
220.8435965222902.0687020.470996(6, 29943406)6299434067
230.6240324802319.4034750.517632(6, 29943422)6299434227
241.000000520123.9560231.000000(17, 7674221)1776742217
250.97691530.3199150.498495(7, 142750675)71427506757
310.99949982.0927420.781609(12, 2685853)1226858536
330.86704831518.9805830.640445(6, 29944118)6299441186
320.96123930.4923660.479700(4, 144120554)41441205546
300.116652481300.2194660.601301(7, 55165350)7551653506
280.50168947284.0257940.500000(9, 128257486)91282574866
270.777019518121.8435110.875674(17, 7673776)1776737766
...........................
921.000000517117.5267181.000000(17, 7674256)1776742563
930.49332851282.1202290.692187(17, 7673704)1776737043
970.67140650194.8541270.794175(17, 31350209)17313502093
950.666459523986.1145040.500538(8, 100709671)81007096713
960.95112500.0472440.509217(3, 183959847)31839598473
860.224100131.7284890.588009(3, 75630794)3756307943
980.530668518380.9942750.738304(5, 68295269)5682952693
990.87078149523.5725190.462571(22, 24627926)22246279263
1000.75490200.0019570.500000(20, 8788776)2087887763
1010.28904720513.2736220.551120(6, 31271875)6312718753
1020.318593524977.5515270.504434(6, 31271839)6312718393
870.5495321037.1495100.523889(3, 49686483)3496864833
830.9650235102437.3996100.856497(6, 29942858)6299428583
850.756602292.6145040.669605(3, 75630855)3756308553
740.026316436162.3067960.784810(6, 32664926)6326649263
680.3806955122479.9599240.497330(6, 29944132)6299441323
690.0699745162769.6526720.599747(6, 29944135)6299441353
700.002137162.0423450.495708(1, 237591774)12375917743
710.8084345243416.6507630.442027(6, 29944376)6299443763
720.2326685243618.6469470.603043(6, 29944151)6299441513
730.741291506315.1984730.621487(6, 32664883)6326648833
750.86320339531.9369020.488386(10, 4999206)1049992063
840.0619885242200.0858780.573877(6, 29942916)6299429163
760.815908493215.2118320.928361(6, 32661393)6326613933
770.380747480191.6826920.596527(6, 32661384)6326613843
780.778990272.5810810.645061(6, 32661333)6326613333
790.00740701.3076920.481203(12, 57099758)12570997583
810.9939345231127.1335880.582806(16, 2106849)1621068493
820.9955225171391.3957530.989221(6, 29942795)6299427953
1030.9532585222469.6111110.582500(6, 31356399)6313563993
\n", "

100 rows × 8 columns

\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n", "1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n", "2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n", "5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n", "6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n", "3 0.991546 457 38.312977 0.507767 (14, 32092134) 14 32092134 \n", "7 0.738417 0 0.001931 0.500000 (8, 142877758) 8 142877758 \n", "8 0.989537 91 5.262948 0.503589 (X, 24789042) X 24789042 \n", "9 0.999940 513 210.776718 0.996541 (1, 109690516) 1 109690516 \n", "10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n", "11 0.684925 286 16.242366 0.613307 (6, 29944050) 6 29944050 \n", "13 0.998226 524 1291.374046 0.694932 (X, 24788994) X 24788994 \n", "14 0.994127 431 36.320537 0.621622 (12, 6018369) 12 6018369 \n", "17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n", "15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n", "16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n", "18 0.924295 502 79.051527 0.615436 (12, 6018901) 12 6018901 \n", "19 0.967688 524 950.933206 0.671795 (6, 31271836) 6 31271836 \n", "20 0.524973 502 1370.792233 0.456735 (6, 31356729) 6 31356729 \n", "21 0.966475 1 0.392720 0.500000 (7, 117548682) 7 117548682 \n", "22 0.843596 522 2902.068702 0.470996 (6, 29943406) 6 29943406 \n", "23 0.624032 480 2319.403475 0.517632 (6, 29943422) 6 29943422 \n", "24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n", "25 0.976915 3 0.319915 0.498495 (7, 142750675) 7 142750675 \n", "31 0.999499 8 2.092742 0.781609 (12, 2685853) 12 2685853 \n", "33 0.867048 315 18.980583 0.640445 (6, 29944118) 6 29944118 \n", "32 0.961239 3 0.492366 0.479700 (4, 144120554) 4 144120554 \n", "30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n", "28 0.501689 472 84.025794 0.500000 (9, 128257486) 9 128257486 \n", "27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n", ".. ... ... ... ... ... .. ... \n", "92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n", "93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n", "97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n", "95 0.666459 523 986.114504 0.500538 (8, 100709671) 8 100709671 \n", "96 0.951125 0 0.047244 0.509217 (3, 183959847) 3 183959847 \n", "86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n", "98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n", "99 0.870781 495 23.572519 0.462571 (22, 24627926) 22 24627926 \n", "100 0.754902 0 0.001957 0.500000 (20, 8788776) 20 8788776 \n", "101 0.289047 205 13.273622 0.551120 (6, 31271875) 6 31271875 \n", "102 0.318593 524 977.551527 0.504434 (6, 31271839) 6 31271839 \n", "87 0.549532 103 7.149510 0.523889 (3, 49686483) 3 49686483 \n", "83 0.965023 510 2437.399610 0.856497 (6, 29942858) 6 29942858 \n", "85 0.756602 29 2.614504 0.669605 (3, 75630855) 3 75630855 \n", "74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n", "68 0.380695 512 2479.959924 0.497330 (6, 29944132) 6 29944132 \n", "69 0.069974 516 2769.652672 0.599747 (6, 29944135) 6 29944135 \n", "70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n", "71 0.808434 524 3416.650763 0.442027 (6, 29944376) 6 29944376 \n", "72 0.232668 524 3618.646947 0.603043 (6, 29944151) 6 29944151 \n", "73 0.741291 506 315.198473 0.621487 (6, 32664883) 6 32664883 \n", "75 0.863203 395 31.936902 0.488386 (10, 4999206) 10 4999206 \n", "84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n", "76 0.815908 493 215.211832 0.928361 (6, 32661393) 6 32661393 \n", "77 0.380747 480 191.682692 0.596527 (6, 32661384) 6 32661384 \n", "78 0.778990 27 2.581081 0.645061 (6, 32661333) 6 32661333 \n", "79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n", "81 0.993934 523 1127.133588 0.582806 (16, 2106849) 16 2106849 \n", "82 0.995522 517 1391.395753 0.989221 (6, 29942795) 6 29942795 \n", "103 0.953258 522 2469.611111 0.582500 (6, 31356399) 6 31356399 \n", "\n", " tcga_wxs_count \n", "0 371 \n", "1 59 \n", "2 38 \n", "5 15 \n", "6 15 \n", "3 14 \n", "7 11 \n", "8 11 \n", "9 10 \n", "10 10 \n", "11 9 \n", "13 9 \n", "14 9 \n", "17 8 \n", "15 8 \n", "16 8 \n", "18 7 \n", "19 7 \n", "20 7 \n", "21 7 \n", "22 7 \n", "23 7 \n", "24 7 \n", "25 7 \n", "31 6 \n", "33 6 \n", "32 6 \n", "30 6 \n", "28 6 \n", "27 6 \n", ".. ... \n", "92 3 \n", "93 3 \n", "97 3 \n", "95 3 \n", "96 3 \n", "86 3 \n", "98 3 \n", "99 3 \n", "100 3 \n", "101 3 \n", "102 3 \n", "87 3 \n", "83 3 \n", "85 3 \n", "74 3 \n", "68 3 \n", "69 3 \n", "70 3 \n", "71 3 \n", "72 3 \n", "73 3 \n", "75 3 \n", "84 3 \n", "76 3 \n", "77 3 \n", "78 3 \n", "79 3 \n", "81 3 \n", "82 3 \n", "103 3 \n", "\n", "[100 rows x 8 columns]" ] }, "execution_count": 171, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top100GeneDf.sort_values('tcga_wxs_count',ascending=False)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "vcfDf['GeneName']=vcfDf.Annot.str.extract('GENEINFO=(\\w+)',expand=False)" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "posToGeneNameS=vcfDf.drop_duplicates(['Chr','Pos']).set_index(['Chr','Pos'])['GeneName']" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "top100GeneDf['Chr']=top100GeneDf.vcfIndex.apply(lambda L:L[0])\n", "top100GeneDf['Pos']=top100GeneDf.vcfIndex.apply(lambda L:int(L[1]))" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "top100GeneDf['Gene']=posToGeneNameS[top100GeneDf.set_index(['Chr','Pos']).index].values" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_countGene
00.998211519133.8225190.984290(2, 208248388)2208248388371IDH1
10.543616520122.0763360.945266(17, 7673803)17767380359TP53
20.865149519133.3759540.896410(2, 208248389)220824838938IDH1
30.99154645738.3129770.507767(14, 32092134)143209213414ARHGAP5
51.000000524353.8416031.000000(15, 90088606)159008860615IDH2
60.954091520121.3728490.948889(17, 7673802)17767380215TP53
70.73841700.0019310.500000(8, 142877758)814287775811CYP11B1
80.989537915.2629480.503589(X, 24789042)X2478904211POLA1
90.999940513210.7767180.996541(1, 109690516)110969051610GSTM1
100.97515449659.1354960.999370(17, 7674872)17767487210TP53
110.68492528616.2423660.613307(6, 29944050)6299440509HLA
130.9982265241291.3740460.694932(X, 24788994)X247889949POLA1
140.99412743136.3205370.621622(12, 6018369)1260183699VWF
150.895074520124.6367110.929366(17, 7674220)1776742208TP53
160.85000051091.6908400.998394(17, 7675076)1776750768TP53
170.779858509107.2053740.765568(17, 7675088)1776750888TP53
180.92429550279.0515270.615436(12, 6018901)1260189017VWF
190.967688524950.9332060.671795(6, 31271836)6312718367HLA
200.5249735021370.7922330.456735(6, 31356729)6313567297MIR6891
210.96647510.3927200.500000(7, 117548682)71175486827CFTR
220.8435965222902.0687020.470996(6, 29943406)6299434067HLA
230.6240324802319.4034750.517632(6, 29943422)6299434227HLA
241.000000520123.9560231.000000(17, 7674221)1776742217TP53
250.97691530.3199150.498495(7, 142750675)71427506757PRSS1
260.8815032129.7900760.500000(7, 152238825)71522388256KMT2C
270.777019518121.8435110.875674(17, 7673776)1776737766TP53
280.50168947284.0257940.500000(9, 128257486)91282574866GOLGA2
290.96214240.2159530.501053(7, 142750600)71427506006PRSS1
300.116652481300.2194660.601301(7, 55165350)7551653506EGFR
310.99949982.0927420.781609(12, 2685853)1226858536CACNA1C
..............................
730.741291506315.1984730.621487(6, 32664883)6326648833HLA
740.026316436162.3067960.784810(6, 32664926)6326649263HLA
750.86320339531.9369020.488386(10, 4999206)1049992063AKR1C2
760.815908493215.2118320.928361(6, 32661393)6326613933HLA
770.380747480191.6826920.596527(6, 32661384)6326613843HLA
780.778990272.5810810.645061(6, 32661333)6326613333HLA
790.00740701.3076920.481203(12, 57099758)12570997583STAT6
810.9939345231127.1335880.582806(16, 2106849)1621068493MIR6511B1
820.9955225171391.3957530.989221(6, 29942795)6299427953HLA
830.9650235102437.3996100.856497(6, 29942858)6299428583HLA
840.0619885242200.0858780.573877(6, 29942916)6299429163HLA
850.756602292.6145040.669605(3, 75630855)3756308553LOC107986102
860.224100131.7284890.588009(3, 75630794)3756307943LOC107986102
870.5495321037.1495100.523889(3, 49686483)3496864833MST1
880.6836125191443.9732820.499179(6, 29943463)6299434633HLA
890.758810353.0599610.520303(6, 29943667)6299436673HLA
900.85811252366.9847330.899413(3, 179234284)31792342843PIK3CA
910.84474251388.1431300.874750(17, 7674888)1776748883TP53
921.000000517117.5267181.000000(17, 7674256)1776742563TP53
930.49332851282.1202290.692187(17, 7673704)1776737043TP53
940.403970553.7862750.490953(17, 21416556)17214165563KCNJ12
950.666459523986.1145040.500538(8, 100709671)81007096713PABPC1
960.95112500.0472440.509217(3, 183959847)31839598473ABCC5
970.67140650194.8541270.794175(17, 31350209)17313502093NF1
980.530668518380.9942750.738304(5, 68295269)5682952693PIK3R1
990.87078149523.5725190.462571(22, 24627926)22246279263GGT1
1000.75490200.0019570.500000(20, 8788776)2087887763PLCB1
1010.28904720513.2736220.551120(6, 31271875)6312718753HLA
1020.318593524977.5515270.504434(6, 31271839)6312718393HLA
1030.9532585222469.6111110.582500(6, 31356399)6313563993MIR6891
\n", "

100 rows × 9 columns

\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n", "1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n", "2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n", "3 0.991546 457 38.312977 0.507767 (14, 32092134) 14 32092134 \n", "5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n", "6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n", "7 0.738417 0 0.001931 0.500000 (8, 142877758) 8 142877758 \n", "8 0.989537 91 5.262948 0.503589 (X, 24789042) X 24789042 \n", "9 0.999940 513 210.776718 0.996541 (1, 109690516) 1 109690516 \n", "10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n", "11 0.684925 286 16.242366 0.613307 (6, 29944050) 6 29944050 \n", "13 0.998226 524 1291.374046 0.694932 (X, 24788994) X 24788994 \n", "14 0.994127 431 36.320537 0.621622 (12, 6018369) 12 6018369 \n", "15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n", "16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n", "17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n", "18 0.924295 502 79.051527 0.615436 (12, 6018901) 12 6018901 \n", "19 0.967688 524 950.933206 0.671795 (6, 31271836) 6 31271836 \n", "20 0.524973 502 1370.792233 0.456735 (6, 31356729) 6 31356729 \n", "21 0.966475 1 0.392720 0.500000 (7, 117548682) 7 117548682 \n", "22 0.843596 522 2902.068702 0.470996 (6, 29943406) 6 29943406 \n", "23 0.624032 480 2319.403475 0.517632 (6, 29943422) 6 29943422 \n", "24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n", "25 0.976915 3 0.319915 0.498495 (7, 142750675) 7 142750675 \n", "26 0.881503 212 9.790076 0.500000 (7, 152238825) 7 152238825 \n", "27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n", "28 0.501689 472 84.025794 0.500000 (9, 128257486) 9 128257486 \n", "29 0.962142 4 0.215953 0.501053 (7, 142750600) 7 142750600 \n", "30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n", "31 0.999499 8 2.092742 0.781609 (12, 2685853) 12 2685853 \n", ".. ... ... ... ... ... .. ... \n", "73 0.741291 506 315.198473 0.621487 (6, 32664883) 6 32664883 \n", "74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n", "75 0.863203 395 31.936902 0.488386 (10, 4999206) 10 4999206 \n", "76 0.815908 493 215.211832 0.928361 (6, 32661393) 6 32661393 \n", "77 0.380747 480 191.682692 0.596527 (6, 32661384) 6 32661384 \n", "78 0.778990 27 2.581081 0.645061 (6, 32661333) 6 32661333 \n", "79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n", "81 0.993934 523 1127.133588 0.582806 (16, 2106849) 16 2106849 \n", "82 0.995522 517 1391.395753 0.989221 (6, 29942795) 6 29942795 \n", "83 0.965023 510 2437.399610 0.856497 (6, 29942858) 6 29942858 \n", "84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n", "85 0.756602 29 2.614504 0.669605 (3, 75630855) 3 75630855 \n", "86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n", "87 0.549532 103 7.149510 0.523889 (3, 49686483) 3 49686483 \n", "88 0.683612 519 1443.973282 0.499179 (6, 29943463) 6 29943463 \n", "89 0.758810 35 3.059961 0.520303 (6, 29943667) 6 29943667 \n", "90 0.858112 523 66.984733 0.899413 (3, 179234284) 3 179234284 \n", "91 0.844742 513 88.143130 0.874750 (17, 7674888) 17 7674888 \n", "92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n", "93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n", "94 0.403970 55 3.786275 0.490953 (17, 21416556) 17 21416556 \n", "95 0.666459 523 986.114504 0.500538 (8, 100709671) 8 100709671 \n", "96 0.951125 0 0.047244 0.509217 (3, 183959847) 3 183959847 \n", "97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n", "98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n", "99 0.870781 495 23.572519 0.462571 (22, 24627926) 22 24627926 \n", "100 0.754902 0 0.001957 0.500000 (20, 8788776) 20 8788776 \n", "101 0.289047 205 13.273622 0.551120 (6, 31271875) 6 31271875 \n", "102 0.318593 524 977.551527 0.504434 (6, 31271839) 6 31271839 \n", "103 0.953258 522 2469.611111 0.582500 (6, 31356399) 6 31356399 \n", "\n", " tcga_wxs_count Gene \n", "0 371 IDH1 \n", "1 59 TP53 \n", "2 38 IDH1 \n", "3 14 ARHGAP5 \n", "5 15 IDH2 \n", "6 15 TP53 \n", "7 11 CYP11B1 \n", "8 11 POLA1 \n", "9 10 GSTM1 \n", "10 10 TP53 \n", "11 9 HLA \n", "13 9 POLA1 \n", "14 9 VWF \n", "15 8 TP53 \n", "16 8 TP53 \n", "17 8 TP53 \n", "18 7 VWF \n", "19 7 HLA \n", "20 7 MIR6891 \n", "21 7 CFTR \n", "22 7 HLA \n", "23 7 HLA \n", "24 7 TP53 \n", "25 7 PRSS1 \n", "26 6 KMT2C \n", "27 6 TP53 \n", "28 6 GOLGA2 \n", "29 6 PRSS1 \n", "30 6 EGFR \n", "31 6 CACNA1C \n", ".. ... ... \n", "73 3 HLA \n", "74 3 HLA \n", "75 3 AKR1C2 \n", "76 3 HLA \n", "77 3 HLA \n", "78 3 HLA \n", "79 3 STAT6 \n", "81 3 MIR6511B1 \n", "82 3 HLA \n", "83 3 HLA \n", "84 3 HLA \n", "85 3 LOC107986102 \n", "86 3 LOC107986102 \n", "87 3 MST1 \n", "88 3 HLA \n", "89 3 HLA \n", "90 3 PIK3CA \n", "91 3 TP53 \n", "92 3 TP53 \n", "93 3 TP53 \n", "94 3 KCNJ12 \n", "95 3 PABPC1 \n", "96 3 ABCC5 \n", "97 3 NF1 \n", "98 3 PIK3R1 \n", "99 3 GGT1 \n", "100 3 PLCB1 \n", "101 3 HLA \n", "102 3 HLA \n", "103 3 MIR6891 \n", "\n", "[100 rows x 9 columns]" ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top100GeneDf" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [], "source": [ "geneDf=top100GeneDf#.groupby('Gene')#.head(n=1)" ] }, { "cell_type": "code", "execution_count": 138, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_countGeneClassification
00.998211519133.8225190.984290(2, 208248388)2208248388371IDH1Oncogene
10.543616520122.0763360.945266(17, 7673803)17767380359TP53TSG
20.865149519133.3759540.896410(2, 208248389)220824838938IDH1Oncogene
30.99154645738.3129770.507767(14, 32092134)143209213414ARHGAP5NaN
51.000000524353.8416031.000000(15, 90088606)159008860615IDH2Oncogene
60.954091520121.3728490.948889(17, 7673802)17767380215TP53TSG
70.73841700.0019310.500000(8, 142877758)814287775811CYP11B1NaN
80.989537915.2629480.503589(X, 24789042)X2478904211POLA1NaN
90.999940513210.7767180.996541(1, 109690516)110969051610GSTM1NaN
100.97515449659.1354960.999370(17, 7674872)17767487210TP53TSG
110.68492528616.2423660.613307(6, 29944050)6299440509HLANaN
130.9982265241291.3740460.694932(X, 24788994)X247889949POLA1NaN
140.99412743136.3205370.621622(12, 6018369)1260183699VWFNaN
150.895074520124.6367110.929366(17, 7674220)1776742208TP53TSG
160.85000051091.6908400.998394(17, 7675076)1776750768TP53TSG
170.779858509107.2053740.765568(17, 7675088)1776750888TP53TSG
180.92429550279.0515270.615436(12, 6018901)1260189017VWFNaN
190.967688524950.9332060.671795(6, 31271836)6312718367HLANaN
200.5249735021370.7922330.456735(6, 31356729)6313567297MIR6891NaN
210.96647510.3927200.500000(7, 117548682)71175486827CFTRNaN
220.8435965222902.0687020.470996(6, 29943406)6299434067HLANaN
230.6240324802319.4034750.517632(6, 29943422)6299434227HLANaN
241.000000520123.9560231.000000(17, 7674221)1776742217TP53TSG
250.97691530.3199150.498495(7, 142750675)71427506757PRSS1NaN
260.8815032129.7900760.500000(7, 152238825)71522388256KMT2CNaN
270.777019518121.8435110.875674(17, 7673776)1776737766TP53TSG
280.50168947284.0257940.500000(9, 128257486)91282574866GOLGA2NaN
290.96214240.2159530.501053(7, 142750600)71427506006PRSS1NaN
300.116652481300.2194660.601301(7, 55165350)7551653506EGFROncogene
310.99949982.0927420.781609(12, 2685853)1226858536CACNA1CNaN
.................................
730.741291506315.1984730.621487(6, 32664883)6326648833HLANaN
740.026316436162.3067960.784810(6, 32664926)6326649263HLANaN
750.86320339531.9369020.488386(10, 4999206)1049992063AKR1C2NaN
760.815908493215.2118320.928361(6, 32661393)6326613933HLANaN
770.380747480191.6826920.596527(6, 32661384)6326613843HLANaN
780.778990272.5810810.645061(6, 32661333)6326613333HLANaN
790.00740701.3076920.481203(12, 57099758)12570997583STAT6NaN
810.9939345231127.1335880.582806(16, 2106849)1621068493MIR6511B1NaN
820.9955225171391.3957530.989221(6, 29942795)6299427953HLANaN
830.9650235102437.3996100.856497(6, 29942858)6299428583HLANaN
840.0619885242200.0858780.573877(6, 29942916)6299429163HLANaN
850.756602292.6145040.669605(3, 75630855)3756308553LOC107986102NaN
860.224100131.7284890.588009(3, 75630794)3756307943LOC107986102NaN
870.5495321037.1495100.523889(3, 49686483)3496864833MST1NaN
880.6836125191443.9732820.499179(6, 29943463)6299434633HLANaN
890.758810353.0599610.520303(6, 29943667)6299436673HLANaN
900.85811252366.9847330.899413(3, 179234284)31792342843PIK3CAOncogene
910.84474251388.1431300.874750(17, 7674888)1776748883TP53TSG
921.000000517117.5267181.000000(17, 7674256)1776742563TP53TSG
930.49332851282.1202290.692187(17, 7673704)1776737043TP53TSG
940.403970553.7862750.490953(17, 21416556)17214165563KCNJ12NaN
950.666459523986.1145040.500538(8, 100709671)81007096713PABPC1NaN
960.95112500.0472440.509217(3, 183959847)31839598473ABCC5NaN
970.67140650194.8541270.794175(17, 31350209)17313502093NF1TSG
980.530668518380.9942750.738304(5, 68295269)5682952693PIK3R1TSG
990.87078149523.5725190.462571(22, 24627926)22246279263GGT1NaN
1000.75490200.0019570.500000(20, 8788776)2087887763PLCB1NaN
1010.28904720513.2736220.551120(6, 31271875)6312718753HLANaN
1020.318593524977.5515270.504434(6, 31271839)6312718393HLANaN
1030.9532585222469.6111110.582500(6, 31356399)6313563993MIR6891NaN
\n", "

100 rows × 10 columns

\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n", "1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n", "2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n", "3 0.991546 457 38.312977 0.507767 (14, 32092134) 14 32092134 \n", "5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n", "6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n", "7 0.738417 0 0.001931 0.500000 (8, 142877758) 8 142877758 \n", "8 0.989537 91 5.262948 0.503589 (X, 24789042) X 24789042 \n", "9 0.999940 513 210.776718 0.996541 (1, 109690516) 1 109690516 \n", "10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n", "11 0.684925 286 16.242366 0.613307 (6, 29944050) 6 29944050 \n", "13 0.998226 524 1291.374046 0.694932 (X, 24788994) X 24788994 \n", "14 0.994127 431 36.320537 0.621622 (12, 6018369) 12 6018369 \n", "15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n", "16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n", "17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n", "18 0.924295 502 79.051527 0.615436 (12, 6018901) 12 6018901 \n", "19 0.967688 524 950.933206 0.671795 (6, 31271836) 6 31271836 \n", "20 0.524973 502 1370.792233 0.456735 (6, 31356729) 6 31356729 \n", "21 0.966475 1 0.392720 0.500000 (7, 117548682) 7 117548682 \n", "22 0.843596 522 2902.068702 0.470996 (6, 29943406) 6 29943406 \n", "23 0.624032 480 2319.403475 0.517632 (6, 29943422) 6 29943422 \n", "24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n", "25 0.976915 3 0.319915 0.498495 (7, 142750675) 7 142750675 \n", "26 0.881503 212 9.790076 0.500000 (7, 152238825) 7 152238825 \n", "27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n", "28 0.501689 472 84.025794 0.500000 (9, 128257486) 9 128257486 \n", "29 0.962142 4 0.215953 0.501053 (7, 142750600) 7 142750600 \n", "30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n", "31 0.999499 8 2.092742 0.781609 (12, 2685853) 12 2685853 \n", ".. ... ... ... ... ... .. ... \n", "73 0.741291 506 315.198473 0.621487 (6, 32664883) 6 32664883 \n", "74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n", "75 0.863203 395 31.936902 0.488386 (10, 4999206) 10 4999206 \n", "76 0.815908 493 215.211832 0.928361 (6, 32661393) 6 32661393 \n", "77 0.380747 480 191.682692 0.596527 (6, 32661384) 6 32661384 \n", "78 0.778990 27 2.581081 0.645061 (6, 32661333) 6 32661333 \n", "79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n", "81 0.993934 523 1127.133588 0.582806 (16, 2106849) 16 2106849 \n", "82 0.995522 517 1391.395753 0.989221 (6, 29942795) 6 29942795 \n", "83 0.965023 510 2437.399610 0.856497 (6, 29942858) 6 29942858 \n", "84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n", "85 0.756602 29 2.614504 0.669605 (3, 75630855) 3 75630855 \n", "86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n", "87 0.549532 103 7.149510 0.523889 (3, 49686483) 3 49686483 \n", "88 0.683612 519 1443.973282 0.499179 (6, 29943463) 6 29943463 \n", "89 0.758810 35 3.059961 0.520303 (6, 29943667) 6 29943667 \n", "90 0.858112 523 66.984733 0.899413 (3, 179234284) 3 179234284 \n", "91 0.844742 513 88.143130 0.874750 (17, 7674888) 17 7674888 \n", "92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n", "93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n", "94 0.403970 55 3.786275 0.490953 (17, 21416556) 17 21416556 \n", "95 0.666459 523 986.114504 0.500538 (8, 100709671) 8 100709671 \n", "96 0.951125 0 0.047244 0.509217 (3, 183959847) 3 183959847 \n", "97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n", "98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n", "99 0.870781 495 23.572519 0.462571 (22, 24627926) 22 24627926 \n", "100 0.754902 0 0.001957 0.500000 (20, 8788776) 20 8788776 \n", "101 0.289047 205 13.273622 0.551120 (6, 31271875) 6 31271875 \n", "102 0.318593 524 977.551527 0.504434 (6, 31271839) 6 31271839 \n", "103 0.953258 522 2469.611111 0.582500 (6, 31356399) 6 31356399 \n", "\n", " tcga_wxs_count Gene Classification \n", "0 371 IDH1 Oncogene \n", "1 59 TP53 TSG \n", "2 38 IDH1 Oncogene \n", "3 14 ARHGAP5 NaN \n", "5 15 IDH2 Oncogene \n", "6 15 TP53 TSG \n", "7 11 CYP11B1 NaN \n", "8 11 POLA1 NaN \n", "9 10 GSTM1 NaN \n", "10 10 TP53 TSG \n", "11 9 HLA NaN \n", "13 9 POLA1 NaN \n", "14 9 VWF NaN \n", "15 8 TP53 TSG \n", "16 8 TP53 TSG \n", "17 8 TP53 TSG \n", "18 7 VWF NaN \n", "19 7 HLA NaN \n", "20 7 MIR6891 NaN \n", "21 7 CFTR NaN \n", "22 7 HLA NaN \n", "23 7 HLA NaN \n", "24 7 TP53 TSG \n", "25 7 PRSS1 NaN \n", "26 6 KMT2C NaN \n", "27 6 TP53 TSG \n", "28 6 GOLGA2 NaN \n", "29 6 PRSS1 NaN \n", "30 6 EGFR Oncogene \n", "31 6 CACNA1C NaN \n", ".. ... ... ... \n", "73 3 HLA NaN \n", "74 3 HLA NaN \n", "75 3 AKR1C2 NaN \n", "76 3 HLA NaN \n", "77 3 HLA NaN \n", "78 3 HLA NaN \n", "79 3 STAT6 NaN \n", "81 3 MIR6511B1 NaN \n", "82 3 HLA NaN \n", "83 3 HLA NaN \n", "84 3 HLA NaN \n", "85 3 LOC107986102 NaN \n", "86 3 LOC107986102 NaN \n", "87 3 MST1 NaN \n", "88 3 HLA NaN \n", "89 3 HLA NaN \n", "90 3 PIK3CA Oncogene \n", "91 3 TP53 TSG \n", "92 3 TP53 TSG \n", "93 3 TP53 TSG \n", "94 3 KCNJ12 NaN \n", "95 3 PABPC1 NaN \n", "96 3 ABCC5 NaN \n", "97 3 NF1 TSG \n", "98 3 PIK3R1 TSG \n", "99 3 GGT1 NaN \n", "100 3 PLCB1 NaN \n", "101 3 HLA NaN \n", "102 3 HLA NaN \n", "103 3 MIR6891 NaN \n", "\n", "[100 rows x 10 columns]" ] }, "execution_count": 138, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top100GeneDf" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [], "source": [ "dbsnpFlagDf=pd.read_csv('./Data/oncogene_ts.tsv',sep='\\t')" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [], "source": [ "geneToStatus=dbsnpFlagDf.set_index(['Gene Symbol'])['Classification*']" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [], "source": [ "withClassificationStatDf=geneDf[geneDf.Classification.notnull()]" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [], "source": [ "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 137, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_countGeneClassification
00.998211519133.8225190.984290(2, 208248388)2208248388371IDH1Oncogene
10.543616520122.0763360.945266(17, 7673803)17767380359TP53TSG
20.865149519133.3759540.896410(2, 208248389)220824838938IDH1Oncogene
51.000000524353.8416031.000000(15, 90088606)159008860615IDH2Oncogene
60.954091520121.3728490.948889(17, 7673802)17767380215TP53TSG
100.97515449659.1354960.999370(17, 7674872)17767487210TP53TSG
150.895074520124.6367110.929366(17, 7674220)1776742208TP53TSG
160.85000051091.6908400.998394(17, 7675076)1776750768TP53TSG
170.779858509107.2053740.765568(17, 7675088)1776750888TP53TSG
241.000000520123.9560231.000000(17, 7674221)1776742217TP53TSG
270.777019518121.8435110.875674(17, 7673776)1776737766TP53TSG
300.116652481300.2194660.601301(7, 55165350)7551653506EGFROncogene
340.831944519120.8053440.871324(17, 7674230)1776742305TP53TSG
350.79572551399.9675570.869141(17, 7674945)1776749455TP53TSG
370.718190464182.2767180.845446(7, 55154129)7551541295EGFROncogene
530.50477123710.7748090.500000(3, 179199690)31791996904PIK3CAOncogene
671.000000516115.8530531.000000(17, 7676044)1776760444TP53TSG
900.85811252366.9847330.899413(3, 179234284)31792342843PIK3CAOncogene
910.84474251388.1431300.874750(17, 7674888)1776748883TP53TSG
921.000000517117.5267181.000000(17, 7674256)1776742563TP53TSG
930.49332851282.1202290.692187(17, 7673704)1776737043TP53TSG
970.67140650194.8541270.794175(17, 31350209)17313502093NF1TSG
980.530668518380.9942750.738304(5, 68295269)5682952693PIK3R1TSG
\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n", "1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n", "2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n", "5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n", "6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n", "10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n", "15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n", "16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n", "17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n", "24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n", "27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n", "30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n", "34 0.831944 519 120.805344 0.871324 (17, 7674230) 17 7674230 \n", "35 0.795725 513 99.967557 0.869141 (17, 7674945) 17 7674945 \n", "37 0.718190 464 182.276718 0.845446 (7, 55154129) 7 55154129 \n", "53 0.504771 237 10.774809 0.500000 (3, 179199690) 3 179199690 \n", "67 1.000000 516 115.853053 1.000000 (17, 7676044) 17 7676044 \n", "90 0.858112 523 66.984733 0.899413 (3, 179234284) 3 179234284 \n", "91 0.844742 513 88.143130 0.874750 (17, 7674888) 17 7674888 \n", "92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n", "93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n", "97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n", "98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n", "\n", " tcga_wxs_count Gene Classification \n", "0 371 IDH1 Oncogene \n", "1 59 TP53 TSG \n", "2 38 IDH1 Oncogene \n", "5 15 IDH2 Oncogene \n", "6 15 TP53 TSG \n", "10 10 TP53 TSG \n", "15 8 TP53 TSG \n", "16 8 TP53 TSG \n", "17 8 TP53 TSG \n", "24 7 TP53 TSG \n", "27 6 TP53 TSG \n", "30 6 EGFR Oncogene \n", "34 5 TP53 TSG \n", "35 5 TP53 TSG \n", "37 5 EGFR Oncogene \n", "53 4 PIK3CA Oncogene \n", "67 4 TP53 TSG \n", "90 3 PIK3CA Oncogene \n", "91 3 TP53 TSG \n", "92 3 TP53 TSG \n", "93 3 TP53 TSG \n", "97 3 NF1 TSG \n", "98 3 PIK3R1 TSG " ] }, "execution_count": 137, "metadata": {}, "output_type": "execute_result" } ], "source": [ "withClassificationStatDf" ] }, { "cell_type": "code", "execution_count": 136, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0.998211\n", "2 0.865149\n", "5 1.000000\n", "30 0.116652\n", "37 0.718190\n", "53 0.504771\n", "90 0.858112\n", "Name: auprc, dtype: float64" ] }, "execution_count": 136, "metadata": {}, "output_type": "execute_result" } ], "source": [ "g=withClassificationStatDf.groupby('Classification')['auprc']\n", "\n", "g.get_group('Oncogene')" ] }, { "cell_type": "code", "execution_count": 131, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAFH5JREFUeJzt3XuUXeV93vHvw8iAMJcESSHOgBCp5MQKdoAI146bGhzhJVgJlAbHJk4htmOSNgjVcd1lOy6hJPFKSNpUYBwMNEakXib4UkfLVbgYm+C2hiIu5mYTTwgXCWqETLlYXCzx6x9nz2YYRtIR1tY5mvl+1pq1Zr/7PXv/ZnQ0z3n35d2pKiRJAthj0AVIkoaHoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqTWrEEXsKPmzp1bCxYsGHQZkrRbueWWWx6rqnnb67fbhcKCBQtYu3btoMuQpN1Kkgf66efhI0lSy1CQJLUMBUlSy1CQJLUMBUlSq7NQSPKXSR5NctdW1ifJ+UnGktyR5KiuapEk9afLkcJlwLJtrD8eWNR8nQH8RYe1SJL60Nl9ClV1Q5IF2+hyEnB59Z4HemOSH0nymqp6pKuahsUFF1zA2NjYoMtg/fr1AIyOjg60joULF7J8+fKB1qAXDcP7c1jemzDz3p+DvHltFHhowvK6pu1loZDkDHqjCebPn79LipsJnnnmmUGXIE3J9+bgDDIUMkVbTdWxqi4GLgZYsmTJlH12J8PyqWPFihUArFy5csCVaJgMw/vT9+bgDPLqo3XAIROWDwYeHlAtkiQGGwqrgdOaq5DeBDwxE84nSNIw6+zwUZLPAscAc5OsA34feBVAVV0ErAFOAMaATcB7uqpFktSfLq8+OnU76wv4na72L0nacd7RLElqGQqSpNZu95AdaboahpvGhsX472H80tSZblfeQGcoSENibGyM79x9G/P33TLoUgZuzx/0DmI894BPWXzw6ZFduj9DQRoi8/fdwkePenLQZWiIfPzW/Xfp/jynIElqGQqSpJahIElqGQqSpNaMO9HsZX8v8rK/l5pp8+ZLU5lxoTA2Nsbtd32LLfscOOhSBm6P53uzkN9y33cHXMngjWz63qBLkIbCjAsFgC37HMgzP33CoMvQEJn97TWDLkEaCp5TkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1ZtwsqevXr2dk0xPOiqmXGNm0kfXrNw+6DGngHClIklozbqQwOjrK/31uls9T0EvM/vYaRkcPGnQZ0sA5UpAktWbcSEEaVuvXr+f7T43w8Vv3H3QpGiIPPDXCq9ev32X7c6QgSWo5UpCGxOjoKM9tfoSPHvXkoEvREPn4rfuz1+joLtufIwVJUstQkCS1DAVJUqvTUEiyLMm9ScaSfHiK9fOTfC3JbUnuSOLNA5I0QJ2FQpIR4ELgeGAxcGqSxZO6fQy4sqqOBN4FfLKreiRJ29flSOGNwFhV3VdVzwNXACdN6lPA+EXZBwAPd1iPJGk7urwkdRR4aMLyOuCfTupzDnBNkuXAq4GlHdYjSdqOLkcKmaKtJi2fClxWVQcDJwB/leRlNSU5I8naJGs3bNjQQamSJOg2FNYBh0xYPpiXHx56H3AlQFV9A9gbmDt5Q1V1cVUtqaol8+bN66hcSVKXoXAzsCjJYUn2pHciefWkPg8CvwiQ5HX0QsGhgCQNSGehUFWbgTOBq4Fv0bvK6O4k5yY5sen2QeD9Sb4JfBb4jaqafIhJkrSLdDr3UVWtAdZMajt7wvf3AG/psgZJUv+8o1mS1DIUJEktQ0GS1DIUJEktQ0GS1PLJa9IQefBpn9EM8N1Nvc+rB+3zwoArGbwHnx5h0S7cn6EgDYmFCxcOuoSh8fzYGAB7HervZBG79r1hKEhDYvny5YMuYWisWLECgJUrVw64kpnHcwqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqzepy40mWASuBEeDSqvrjKfr8KnAOUMA3q+rXuqwJYGTT95j97TVd72bo7fHskwC8sPf+A65k8EY2fQ84aNBlSAPXWSgkGQEuBI4D1gE3J1ldVfdM6LMI+Ajwlqp6PMmPdVXPuIULF3a9i93G2NhTACz8Sf8YwkG+NyT6DIUkHwfOq6r/1yz/KPDBqvrYNl72RmCsqu5rXnMFcBJwz4Q+7wcurKrHAarq0R3/EXbM8uXLu97FbmPFihUArFy5csCVSBoW/Z5TOH48EACaP+InbOc1o8BDE5bXNW0TvRZ4bZL/leTG5nCTJGlA+j18NJJkr6p6DiDJbGCv7bwmU7TVFPtfBBwDHAx8PcnhEwOo2d8ZwBkA8+fP77NkSdKO6nek8N+A65K8L8l7gWuBVdt5zTrgkAnLBwMPT9Hnb6rqB1X1j8C99ELiJarq4qpaUlVL5s2b12fJkqQd1VcoVNV5wB8CrwN+BviDpm1bbgYWJTksyZ7Au4DVk/p8CTgWIMlceoeT7uu/fEnSzrTdw0fNVURXV9VS4Kp+N1xVm5OcCVxN75LUv6yqu5OcC6ytqtXNurcnuQfYAnyoqja+kh9EkvTD224oVNWWJJuSHFBVT+zIxqtqDbBmUtvZE74v4HebL0nSgPV7ovlZ4M4k1wLfH2+sqrM6qUqSNBD9hsL/aL4kSdNYX6FQVauak8U/Te+y0nur6vlOK5Mk7XL93tF8AvAp4B/o3X9wWJLfqqq/7bI4SdKu1e/ho/8MHFtVYwBJ/gm9w0mGgiRNI/3evPboeCA07gM6n6dIkrRr9TtSuDvJGuBKeucU3kFv1tN/CVBVX+yoPknSLtRvKOwNfBd4a7O8ATgQ+GV6IWEoSNI00O/VR+/puhBJ0uD1e/XRp3n5DKdU1Xt3ekWSpIHp9/DRlyd8vzdwMi+f8VSStJvr9/DRFyYuJ/ks8JVOKpIkDUy/l6ROtgjwaTeSNM30e07hKV48p1D0rkT6910VJUkajH4PH+2X5EB6I4S9x5s7q0qSNBD9jhR+E1hB75GatwNvAr4BvK270qavCy64gLGxse137Nh4DStWrBhoHQsXLmT58uUDrUFST7/nFFYARwMPVNWxwJH0bmDTbmz27NnMnj170GVIGiJ9P2Snqp5NQpK9qurbSX6q08qmMT8VSxpW/YbCuiQ/AnwJuDbJ43ifgjQtDcPhzWE5tAkz7/BmvyeaT26+PSfJ14ADgKs6q0rSjOZhzcHpd6TQqqq/66IQScNhJn0q1su90pvXJEnTkKEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKkVqehkGRZknuTjCX58Db6nZKkkizpsh5J0rZ1FgpJRoALgeOBxcCpSRZP0W8/4Czgpq5qkST1p8uRwhuBsaq6r6qeB64ATpqi3x8A5wHPdliLJKkPXYbCKPDQhOV1TVsryZHAIVX15Q7rkCT1qctQyBRt1a5M9gD+HPjgdjeUnJFkbZK1GzZs2IklSpIm6jIU1gGHTFg+GHh4wvJ+wOHA9UnuB94ErJ7qZHNVXVxVS6pqybx58zosWZJmti5D4WZgUZLDkuwJvAtYPb6yqp6oqrlVtaCqFgA3AidW1doOa5IkbUNnoVBVm4EzgauBbwFXVtXdSc5NcmJX+5UkvXKzutx4Va0B1kxqO3srfY/pshZJ0vZ5R7MkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJanYZCkmVJ7k0yluTDU6z/3ST3JLkjyXVJDu2yHknStnUWCklGgAuB44HFwKlJFk/qdhuwpKreAHweOK+reiRJ29flSOGNwFhV3VdVzwNXACdN7FBVX6uqTc3ijcDBHdYjSdqOLkNhFHhowvK6pm1r3gf87VQrkpyRZG2StRs2bNiJJUqSJuoyFDJFW03ZMfl1YAnwp1Otr6qLq2pJVS2ZN2/eTixRkjTRrA63vQ44ZMLywcDDkzslWQr8HvDWqnquw3okSdvR5UjhZmBRksOS7Am8C1g9sUOSI4FPASdW1aMd1iJJ6kNnoVBVm4EzgauBbwFXVtXdSc5NcmLT7U+BfYHPJbk9yeqtbE6StAt0efiIqloDrJnUdvaE75d2uX9J0o7xjmZJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUstQmME2btzIWWedxcaNGwddiqQhYSjMYKtWreLOO+/k8ssvH3QpkoaEoTBDbdy4kauuuoqq4qqrrnK0IAkwFGasVatW8cILLwCwZcsWRwuSAENhxvrKV77C5s2bAdi8eTPXXnvtgCuSNAwMhRlq6dKlzJrVmzl91qxZHHfccQOuSNIwMBRmqNNPP5099uj984+MjHDaaacNuCJJw8BQmKHmzJnDsmXLSMKyZcuYM2fOoEuSNAQ6ffKahtvpp5/O/fff7yhBUstQmMHmzJnD+eefP+gyJA0RDx9JklqGgiSpZShIklqGgiSplaoadA07JMkG4IFB1zGNzAUeG3QR0hR8b+5ch1bVvO112u1CQTtXkrVVtWTQdUiT+d4cDA8fSZJahoIkqWUo6OJBFyBthe/NAfCcgiSp5UhBktRy7qMhl+Rg4EJgMb0Q/zLwoap6fqCFSa9QkjnAdc3ijwNbgA3N8n8HfrVpewH4raq6Kcks4FzgHcD3m76fq6o/2mWFzxCOFIZYkgBfBL5UVYuA1wL7Av5H0G6rqjZW1RFVdQRwEfDnzff/GlgGHFVVbwCWAg81L/tD4CeA1zd9fwF41a6vfvozFIbb24Bnq+rTAFW1BfgA8N4k/ybJF5NcleQ7Sc4bf1GSZUluTfLNJNc1bQcm+VKSO5LcmOQNTfu8JNc2/T+V5IEkc5t1v57k/yS5vVk30rQ/neSPmu3fmOSgCdv6QpKbm6+37NLflnZ3rwEeq6rnAKrqsap6OMk+wPuB5VX1bLPuqao6Z3ClTl+GwnD7GeCWiQ1V9STwIL1Df0cA7wReD7wzySFJ5gGXAL9SVT9Lb7gN8B+B25pPYB8FLm/afx/4alUdRW/oPh8gyeuabb+l+WS2BXh385pXAzc227+B3n9YgJX0PvUdDfwKcOnO+kVoRrgGOCTJ3yf5ZJK3Nu0LgQer6qkB1jZjeE5huAWY6vKw8fbrquoJgCT3AIcCPwrcUFX/CFBV32te88/o/aGmqr6aZE6SA5r2k5v2q5I83vT/ReDngJt7R7GYDTzarHue3rkN6IXW+AOelwKLm/4A+yfZz//M6kdVPZ3k5+gdGjoW+OskHwZundgvyXuAFcAc4Oer6qGXbUyvmKEw3O6m+UM+Lsn+wCH0Prk/N2HVFnr/ntsKkslqK+3j/VdV1UemWPeDevFa5vH9Qm/k+eaqemYr25S2qTlEej1wfZI7gdOBK4H54x8wmsOpn05yFzAyuGqnJw8fDbfrgH2SnAbQHNP/T8BlwKatvOYbwFuTHNa85sCm/Qaawz9JjqF37PZJ4H/Su9qDJG+nN9IY3/cpSX5sfDtJDt1OvdcAZ44vJDmi3x9USvJTSRZNaDoCeKCqNgH/FfhEkr2bviPAngMoc9ozFIZY82n8ZOAdSb4D/D3wLL1zAlt7zQbgDOCLSb4J/HWz6hxgSZI7gD+m9wkMeuca3p7kVuB44BHgqaq6B/gYcE3zmmvpnQjclrPG99EczvrtHfyRNbPtC6xKck/znltM730L8Hv03pt3JbkN+DqwCnh4EIVOZ97RPMMl2QvYUlWbk7wZ+IvmxLKkGchzCpoPXJlkD3onkN+/nf6SpjFHCpKklucUJEktQ0GS1DIUJEktQ0HTSpIfT3JFkn9oLm1ck+S1zY1OO2sf5yZZ2nz/C0nubuaHGk3y+Ve4zd9I8hMTli9Nsnhn1Sz1yxPNmjaaWWX/N707sS9q2o4A9qN3qe3hHezzIuCm8UkLf4jtXA/8u6pau1MKk14hRwqaTo6lNwXHReMNVXU7L06/TJIFSb7ezAp7a5Kfb9pfk+SG5hP/Xc0IYCTJZc3ynUk+0PS9LMkpSX6T3t3gZyf5TLPtu5o+I0n+rHndHUmWN+1nNzPI3pXk4vScAiwBPtPsf3aS65MsaV5zarOdu5L8yYSfZcrZaqUfhqGg6eRwJs0qO4VHgeOaWWHfCZzftP8acHVz497PArfTm2ZhtKoOr6rXAy8ZDVTVpcBqeg89ejcvdQZwGHBkMzPtZ5r2T1TV0c2oZTbwS1X1eWAt8O7mOQPt3FHNIaU/oTeN+hHA0Un+RbN6a7PVSq+YoaCZ5lXAJc1ka5+jN5UCwM3Ae5KcQ+9BLk8B9wE/meSCJMuAJ3dgP0uBi6pqM7xkttpjk9zU7P9t9KZH35ajgeurakOzrc8A/7xZN3m22gU7UJ80JUNB08nd9Kb73pYPAN+lNxpYQjOpWlXdQO+P7Xrgr5KcVlWPN/2uB36HHXs+xMtmq20mc/skcEoz8rgE2LuP7WzN1marlV4xQ0HTyVeBvZK0h1GSHE3vORPjDgAeqaoXgH9FM/VyMwPso1V1Cb0ZOY9K7wl0e1TVF4D/ABy1A7VcA/x2es8WHp+tdjwAHkuyL3DKhP5P0TshPtlN9Ga9ndvMDHoq8Hc7UIe0Q/xkoWmjqirJycB/aR7O8ixwP/BvJ3T7JPCFJO8AvsaLD4E/BvhQkh8ATwOnAaP05u0f//A01bMltuZSes/UvqPZ5iVV9YkklwB3NnXdPKH/ZcBFSZ4B3jzhZ3okyUeaWgOsqaq/2YE6pB3iJamSpJaHjyRJLUNBktQyFCRJLUNBktQyFCRJLUNBktQyFCRJLUNBktT6/0JWkTzJE9KoAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.boxplot(data=withClassificationStatDf,x='Classification',y='auprc')" ] }, { "cell_type": "code", "execution_count": 125, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/cellar/users/btsui/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: FutureWarning: \n", "Passing list-likes to .loc or [] with any missing label will raise\n", "KeyError in the future, you can use .reindex() as an alternative.\n", "\n", "See the documentation here:\n", "https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "geneDf['Classification']=geneToStatus.loc[geneDf.Gene].values" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [], "source": [ "#top100GeneDf" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_countGeneClassification
00.998211519133.8225190.984290(2, 208248388)2208248388371IDH1Oncogene
10.543616520122.0763360.945266(17, 7673803)17767380359TP53TSG
51.000000524353.8416031.000000(15, 90088606)159008860615IDH2Oncogene
300.116652481300.2194660.601301(7, 55165350)7551653506EGFROncogene
530.50477123710.7748090.500000(3, 179199690)31791996904PIK3CAOncogene
970.67140650194.8541270.794175(17, 31350209)17313502093NF1TSG
980.530668518380.9942750.738304(5, 68295269)5682952693PIK3R1TSG
\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n", "1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n", "5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n", "30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n", "53 0.504771 237 10.774809 0.500000 (3, 179199690) 3 179199690 \n", "97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n", "98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n", "\n", " tcga_wxs_count Gene Classification \n", "0 371 IDH1 Oncogene \n", "1 59 TP53 TSG \n", "5 15 IDH2 Oncogene \n", "30 6 EGFR Oncogene \n", "53 4 PIK3CA Oncogene \n", "97 3 NF1 TSG \n", "98 3 PIK3R1 TSG " ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geneDf.dropna()" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_countGeneClassification
00.998211519133.8225190.984290(2, 208248388)2208248388371IDH1Oncogene
10.543616520122.0763360.945266(17, 7673803)17767380359TP53TSG
51.000000524353.8416031.000000(15, 90088606)159008860615IDH2Oncogene
300.116652481300.2194660.601301(7, 55165350)7551653506EGFROncogene
530.50477123710.7748090.500000(3, 179199690)31791996904PIK3CAOncogene
970.67140650194.8541270.794175(17, 31350209)17313502093NF1TSG
980.530668518380.9942750.738304(5, 68295269)5682952693PIK3R1TSG
\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n", "1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n", "5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n", "30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n", "53 0.504771 237 10.774809 0.500000 (3, 179199690) 3 179199690 \n", "97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n", "98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n", "\n", " tcga_wxs_count Gene Classification \n", "0 371 IDH1 Oncogene \n", "1 59 TP53 TSG \n", "5 15 IDH2 Oncogene \n", "30 6 EGFR Oncogene \n", "53 4 PIK3CA Oncogene \n", "97 3 NF1 TSG \n", "98 3 PIK3R1 TSG " ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geneDf.dropna()" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADdVJREFUeJzt3X+MZfVZx/H3Y7eNhKlA3TJZF2RqQkk3HW27k4bExN6RtCIkpbWpgWhllTrGtGjMarLqHyUa4/4hNjE2MdQSsEmZ1EYFAUVcGTca2jgrlF1KEMS17rKBUmDTQaIuefxjLsm67Ow995577p15fL+Sm7nnx5zv8+yd+eTM995zNjITSdLW913TLkCSNB4GuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhHbJjnY9u3bc25ubpJDdu6VV17h/PPPn3YZY2VPW0fFvir2BO36OnTo0AuZ+fZB+0000Ofm5lhdXZ3kkJ1bWVmh1+tNu4yxsqeto2JfFXuCdn1FxL832c8pF0kqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqYqJXim5Vc/vu23Db3vlT7DnH9jaO7r+2k+NKqskzdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIGBnpEXBoRD0XEExHxeET8cn/92yLiwYh4qv/1ou7LlSRtpMkZ+ilgb2a+C7gS+FRE7AL2AQcy83LgQH9ZkjQlAwM9M09k5j/3n38HeALYCVwH3Nnf7U7gI10VKUkabKg59IiYA94LfA2YzcwTsB76wMXjLk6S1FxkZrMdI2aAvwd+JzP/LCJezswLT9v+Uma+YR49IpaAJYDZ2dndy8vL46l8gg4fP7nhttnz4LlXuxl3fucF3Rx4gLW1NWZmZqYydlcq9gQ1+6rYE7Tra3Fx8VBmLgzar1GgR8SbgXuBBzLz9/vrngR6mXkiInYAK5l5xbmOs7CwkKurq40a2Ezm9t234ba986e49fC2TsY9uv/aTo47yMrKCr1ebypjd6ViT1Czr4o9Qbu+IqJRoDf5lEsAXwCeeD3M++4Bbuw/vxG4e5RCJUnj0eTU8oeBTwCHI+LR/rrfAPYDX46Im4BvAh/vpkRJUhMDAz0z/wGIDTZfNd5yJEmj8kpRSSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgx0SSrCQJekIgYGekTcHhHPR8SR09bdEhHHI+LR/uOabsuUJA3S5Az9DuDqs6z/bGa+p/+4f7xlSZKGNTDQM/Mg8OIEapEktdBmDv3TEfFYf0rmorFVJEkaSWTm4J0i5oB7M/Pd/eVZ4AUggd8GdmTmz23wvUvAEsDs7Ozu5eXlsRQ+SYePn9xw2+x58Nyr3Yw7v/OCbg48wNraGjMzM1MZuysVe4KafVXsCdr1tbi4eCgzFwbtN1KgN912poWFhVxdXR043mYzt+++DbftnT/FrYe3dTLu0f3XdnLcQVZWVuj1elMZuysVe4KafVXsCdr1FRGNAn2kKZeI2HHa4keBIxvtK0majIGnlhFxF9ADtkfEMeAzQC8i3sP6lMtR4Bc6rFGS1MDAQM/MG86y+gsd1CJJasErRSWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkoow0CWpCANdkooYGOgRcXtEPB8RR05b97aIeDAinup/vajbMiVJgzQ5Q78DuPqMdfuAA5l5OXCgvyxJmqKBgZ6ZB4EXz1h9HXBn//mdwEfGXJckaUijzqHPZuYJgP7Xi8dXkiRpFJGZg3eKmAPuzcx395dfzswLT9v+UmaedR49IpaAJYDZ2dndy8vLYyh7sg4fP7nhttnz4LlXuxl3fucF3Rx4gLW1NWZmZqYydlcq9gQ1+6rYE7Tra3Fx8VBmLgzab9tIR4fnImJHZp6IiB3A8xvtmJm3AbcBLCwsZK/XG3HI6dmz774Nt+2dP8Wth0f9Zzy3oz/V6+S4g6ysrLAVX6dzqdgT1OyrYk8wmb5GnXK5B7ix//xG4O7xlCNJGlWTjy3eBTwMXBERxyLiJmA/8MGIeAr4YH9ZkjRFA+cKMvOGDTZdNeZaJEkteKWoJBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEQa6JBVhoEtSEdvafHNEHAW+A7wGnMrMhXEUJUkaXqtA71vMzBfGcBxJUgtOuUhSEW0DPYG/iYhDEbE0joIkSaOJzBz9myO+LzOfjYiLgQeBmzPz4Bn7LAFLALOzs7uXl5dHGuvw8ZMj19ml2fPguVe7Ofb8zgu6OfAAa2trzMzMTGXsrlTsCWr2VbEnaNfX4uLioSbvUbYK9P9zoIhbgLXM/L2N9llYWMjV1dWRjj+3774RK+vW3vlT3Hp4HG9FvNHR/dd2ctxBVlZW6PV6Uxm7KxV7gpp9VewJ2vUVEY0CfeQpl4g4PyLe+vpz4EPAkVGPJ0lqp82p5Szw5xHx+nG+lJl/PZaqJElDGznQM/MZ4IfGWIskqQU/tihJRRjoklSEgS5JRRjoklRENx+glqRNaJrXs9xx9fmdj+EZuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhFeWKQ3OHz8JHumdAHGtP5TD6kCz9AlqQgDXZKKMNAlqQgDXZKKMNAlqQgDXZKKMNAlqQgDXZKK8MIiacra/C86e+dPjXwRmBdx1eMZuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhEGuiQVYaBLUhFeWLSJtbngpI2981MZFuiu5zYX4EhbhWfoklSEgS5JRRjoklSEgS5JRbQK9Ii4OiKejIinI2LfuIqSJA1v5ECPiDcBnwN+HNgF3BARu8ZVmCRpOG3O0N8PPJ2Zz2TmfwPLwHXjKUuSNKw2gb4T+I/Tlo/110mSpiAyc7RvjPg48GOZ+cn+8ieA92fmzWfstwQs9RevAJ4cvdxNaTvwwrSLGDN72joq9lWxJ2jX12WZ+fZBO7W5UvQYcOlpy5cAz565U2beBtzWYpxNLSJWM3Nh2nWMkz1tHRX7qtgTTKavNlMu/wRcHhHviIi3ANcD94ynLEnSsEY+Q8/MUxHxaeAB4E3A7Zn5+NgqkyQNpdXNuTLzfuD+MdWyVVWcTrKnraNiXxV7ggn0NfKbopKkzcVL/yWpCAO9gUG3OIiIPRHxrYh4tP/45DTqHFaTWzdExE9GxDci4vGI+NKkaxxWg9fqs6e9Tv8SES9Po85hNejr+yPioYh4JCIei4hrplHnMBr0dFlEHOj3sxIRl0yjzmFExO0R8XxEHNlge0TEH/R7fiwi3jfWAjLTxzkerL/h+6/ADwBvAb4O7Dpjnz3AH0671g76uhx4BLiov3zxtOtu29MZ+9/M+pv5U699DK/VbcAv9p/vAo5Ou+4x9PSnwI395z8KfHHadTfo60eA9wFHNth+DfBXQABXAl8b5/ieoQ9W9RYHTfr6eeBzmfkSQGY+P+EahzXsa3UDcNdEKmunSV8JfE//+QWc5ZqQTaZJT7uAA/3nD51l+6aTmQeBF8+xy3XAn+S6rwIXRsSOcY1voA/W9BYHH+v/CfWViLj0LNs3myZ9vRN4Z0T8Y0R8NSKunlh1o2l8O4qIuAx4B/B3E6irrSZ93QL8dEQcY/2TZzezuTXp6evAx/rPPwq8NSK+dwK1danTW6YY6IPFWdad+dGgvwTmMvMHgb8F7uy8qvaa9LWN9WmXHutns38cERd2XFcbTXp63fXAVzLztQ7rGZcmfd0A3JGZl7D+Z/0XI2Iz/3436elXgQ9ExCPAB4DjwKmuC+vYMD+jQ9vML/hmMfAWB5n57cz8r/7i54HdE6qtjSa3bjgG3J2Z/5OZ/8b6fXgun1B9o2h0O4q+69ka0y3QrK+bgC8DZObDwHezfu+QzarJ79WzmfkTmfle4Df7605OrsRODPMzOjQDfbCBtzg4Yw7sw8ATE6xvVE1u3fAXwCJARGxnfQrmmYlWOZxGt6OIiCuAi4CHJ1zfqJr09U3gKoCIeBfrgf6tiVY5nCa/V9tP+yvj14HbJ1xjF+4Bfqb/aZcrgZOZeWJcB291pej/B7nBLQ4i4reA1cy8B/iliPgw638Ovsj6p142tYZ9PQB8KCK+AbwG/Fpmfnt6VZ9bw55gfXpiOfsfO9jsGva1F/h8RPwK63/C79nM/TXsqQf8bkQkcBD41NQKbigi7mK97u399zM+A7wZIDP/iPX3N64Bngb+E/jZsY6/iV9zSdIQnHKRpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkqwkCXpCIMdEkq4n8BUTSqpFRBAZ8AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "geneDf.rocauc.hist()" ] }, { "cell_type": "code", "execution_count": 109, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_countGene
30.99154645738.3129770.507767(14, 32092134)143209213414ARHGAP5
70.73841700.0019310.500000(8, 142877758)814287775811CYP11B1
80.989537915.2629480.503589(X, 24789042)X2478904211POLA1
110.68492528616.2423660.613307(6, 29944050)6299440509HLA
130.9982265241291.3740460.694932(X, 24788994)X247889949POLA1
140.99412743136.3205370.621622(12, 6018369)1260183699VWF
180.92429550279.0515270.615436(12, 6018901)1260189017VWF
190.967688524950.9332060.671795(6, 31271836)6312718367HLA
200.5249735021370.7922330.456735(6, 31356729)6313567297MIR6891
210.96647510.3927200.500000(7, 117548682)71175486827CFTR
220.8435965222902.0687020.470996(6, 29943406)6299434067HLA
230.6240324802319.4034750.517632(6, 29943422)6299434227HLA
250.97691530.3199150.498495(7, 142750675)71427506757PRSS1
260.8815032129.7900760.500000(7, 152238825)71522388256KMT2C
280.50168947284.0257940.500000(9, 128257486)91282574866GOLGA2
290.96214240.2159530.501053(7, 142750600)71427506006PRSS1
300.116652481300.2194660.601301(7, 55165350)7551653506EGFR
320.96123930.4923660.479700(4, 144120554)41441205546GYPA
330.86704831518.9805830.640445(6, 29944118)6299441186HLA
360.99173491.3154880.507143(19, 14766987)19147669875ADGRE2
380.95652200.0645160.500000(7, 127611678)71276116785PAX4
390.98114100.0136990.504950(1, 235775088)12357750885LYST
400.92248312.2920890.501445(1, 173828313)11738283135DARS2
410.991328151.3317070.523174(2, 166281810)21662818105LOC101929680
420.94444482.3702130.500000(6, 32664778)6326647785HLA
430.072989401747.1783570.569311(6, 31270232)6312702325HLA
440.890402717.5856030.508440(12, 52897420)12528974205KRT8
460.4683475172905.9483750.499899(6, 29942845)6299428455HLA
500.56146450073.9713740.545052(12, 6018910)1260189105VWF
510.996116524189.5229010.500495(5, 236441)52364414SDHA
..............................
590.77508931517.1832060.672237(6, 29944059)6299440594HLA
630.556916283553.9027480.533268(6, 31270214)6312702144HLA
640.0258494541296.3773950.552955(6, 29944124)6299441244HLA
660.0286014841758.9492900.509784(6, 31356377)6313563774MIR6891
680.3806955122479.9599240.497330(6, 29944132)6299441323HLA
690.0699745162769.6526720.599747(6, 29944135)6299441353HLA
700.002137162.0423450.495708(1, 237591774)12375917743RYR2
710.8084345243416.6507630.442027(6, 29944376)6299443763HLA
720.2326685243618.6469470.603043(6, 29944151)6299441513HLA
730.741291506315.1984730.621487(6, 32664883)6326648833HLA
750.86320339531.9369020.488386(10, 4999206)1049992063AKR1C2
770.380747480191.6826920.596527(6, 32661384)6326613843HLA
780.778990272.5810810.645061(6, 32661333)6326613333HLA
790.00740701.3076920.481203(12, 57099758)12570997583STAT6
810.9939345231127.1335880.582806(16, 2106849)1621068493MIR6511B1
840.0619885242200.0858780.573877(6, 29942916)6299429163HLA
850.756602292.6145040.669605(3, 75630855)3756308553LOC107986102
860.224100131.7284890.588009(3, 75630794)3756307943LOC107986102
870.5495321037.1495100.523889(3, 49686483)3496864833MST1
880.6836125191443.9732820.499179(6, 29943463)6299434633HLA
890.758810353.0599610.520303(6, 29943667)6299436673HLA
930.49332851282.1202290.692187(17, 7673704)1776737043TP53
940.403970553.7862750.490953(17, 21416556)17214165563KCNJ12
950.666459523986.1145040.500538(8, 100709671)81007096713PABPC1
960.95112500.0472440.509217(3, 183959847)31839598473ABCC5
990.87078149523.5725190.462571(22, 24627926)22246279263GGT1
1000.75490200.0019570.500000(20, 8788776)2087887763PLCB1
1010.28904720513.2736220.551120(6, 31271875)6312718753HLA
1020.318593524977.5515270.504434(6, 31271839)6312718393HLA
1030.9532585222469.6111110.582500(6, 31356399)6313563993MIR6891
\n", "

67 rows × 9 columns

\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "3 0.991546 457 38.312977 0.507767 (14, 32092134) 14 32092134 \n", "7 0.738417 0 0.001931 0.500000 (8, 142877758) 8 142877758 \n", "8 0.989537 91 5.262948 0.503589 (X, 24789042) X 24789042 \n", "11 0.684925 286 16.242366 0.613307 (6, 29944050) 6 29944050 \n", "13 0.998226 524 1291.374046 0.694932 (X, 24788994) X 24788994 \n", "14 0.994127 431 36.320537 0.621622 (12, 6018369) 12 6018369 \n", "18 0.924295 502 79.051527 0.615436 (12, 6018901) 12 6018901 \n", "19 0.967688 524 950.933206 0.671795 (6, 31271836) 6 31271836 \n", "20 0.524973 502 1370.792233 0.456735 (6, 31356729) 6 31356729 \n", "21 0.966475 1 0.392720 0.500000 (7, 117548682) 7 117548682 \n", "22 0.843596 522 2902.068702 0.470996 (6, 29943406) 6 29943406 \n", "23 0.624032 480 2319.403475 0.517632 (6, 29943422) 6 29943422 \n", "25 0.976915 3 0.319915 0.498495 (7, 142750675) 7 142750675 \n", "26 0.881503 212 9.790076 0.500000 (7, 152238825) 7 152238825 \n", "28 0.501689 472 84.025794 0.500000 (9, 128257486) 9 128257486 \n", "29 0.962142 4 0.215953 0.501053 (7, 142750600) 7 142750600 \n", "30 0.116652 481 300.219466 0.601301 (7, 55165350) 7 55165350 \n", "32 0.961239 3 0.492366 0.479700 (4, 144120554) 4 144120554 \n", "33 0.867048 315 18.980583 0.640445 (6, 29944118) 6 29944118 \n", "36 0.991734 9 1.315488 0.507143 (19, 14766987) 19 14766987 \n", "38 0.956522 0 0.064516 0.500000 (7, 127611678) 7 127611678 \n", "39 0.981141 0 0.013699 0.504950 (1, 235775088) 1 235775088 \n", "40 0.922483 1 2.292089 0.501445 (1, 173828313) 1 173828313 \n", "41 0.991328 15 1.331707 0.523174 (2, 166281810) 2 166281810 \n", "42 0.944444 8 2.370213 0.500000 (6, 32664778) 6 32664778 \n", "43 0.072989 401 747.178357 0.569311 (6, 31270232) 6 31270232 \n", "44 0.890402 71 7.585603 0.508440 (12, 52897420) 12 52897420 \n", "46 0.468347 517 2905.948375 0.499899 (6, 29942845) 6 29942845 \n", "50 0.561464 500 73.971374 0.545052 (12, 6018910) 12 6018910 \n", "51 0.996116 524 189.522901 0.500495 (5, 236441) 5 236441 \n", ".. ... ... ... ... ... .. ... \n", "59 0.775089 315 17.183206 0.672237 (6, 29944059) 6 29944059 \n", "63 0.556916 283 553.902748 0.533268 (6, 31270214) 6 31270214 \n", "64 0.025849 454 1296.377395 0.552955 (6, 29944124) 6 29944124 \n", "66 0.028601 484 1758.949290 0.509784 (6, 31356377) 6 31356377 \n", "68 0.380695 512 2479.959924 0.497330 (6, 29944132) 6 29944132 \n", "69 0.069974 516 2769.652672 0.599747 (6, 29944135) 6 29944135 \n", "70 0.002137 16 2.042345 0.495708 (1, 237591774) 1 237591774 \n", "71 0.808434 524 3416.650763 0.442027 (6, 29944376) 6 29944376 \n", "72 0.232668 524 3618.646947 0.603043 (6, 29944151) 6 29944151 \n", "73 0.741291 506 315.198473 0.621487 (6, 32664883) 6 32664883 \n", "75 0.863203 395 31.936902 0.488386 (10, 4999206) 10 4999206 \n", "77 0.380747 480 191.682692 0.596527 (6, 32661384) 6 32661384 \n", "78 0.778990 27 2.581081 0.645061 (6, 32661333) 6 32661333 \n", "79 0.007407 0 1.307692 0.481203 (12, 57099758) 12 57099758 \n", "81 0.993934 523 1127.133588 0.582806 (16, 2106849) 16 2106849 \n", "84 0.061988 524 2200.085878 0.573877 (6, 29942916) 6 29942916 \n", "85 0.756602 29 2.614504 0.669605 (3, 75630855) 3 75630855 \n", "86 0.224100 13 1.728489 0.588009 (3, 75630794) 3 75630794 \n", "87 0.549532 103 7.149510 0.523889 (3, 49686483) 3 49686483 \n", "88 0.683612 519 1443.973282 0.499179 (6, 29943463) 6 29943463 \n", "89 0.758810 35 3.059961 0.520303 (6, 29943667) 6 29943667 \n", "93 0.493328 512 82.120229 0.692187 (17, 7673704) 17 7673704 \n", "94 0.403970 55 3.786275 0.490953 (17, 21416556) 17 21416556 \n", "95 0.666459 523 986.114504 0.500538 (8, 100709671) 8 100709671 \n", "96 0.951125 0 0.047244 0.509217 (3, 183959847) 3 183959847 \n", "99 0.870781 495 23.572519 0.462571 (22, 24627926) 22 24627926 \n", "100 0.754902 0 0.001957 0.500000 (20, 8788776) 20 8788776 \n", "101 0.289047 205 13.273622 0.551120 (6, 31271875) 6 31271875 \n", "102 0.318593 524 977.551527 0.504434 (6, 31271839) 6 31271839 \n", "103 0.953258 522 2469.611111 0.582500 (6, 31356399) 6 31356399 \n", "\n", " tcga_wxs_count Gene \n", "3 14 ARHGAP5 \n", "7 11 CYP11B1 \n", "8 11 POLA1 \n", "11 9 HLA \n", "13 9 POLA1 \n", "14 9 VWF \n", "18 7 VWF \n", "19 7 HLA \n", "20 7 MIR6891 \n", "21 7 CFTR \n", "22 7 HLA \n", "23 7 HLA \n", "25 7 PRSS1 \n", "26 6 KMT2C \n", "28 6 GOLGA2 \n", "29 6 PRSS1 \n", "30 6 EGFR \n", "32 6 GYPA \n", "33 6 HLA \n", "36 5 ADGRE2 \n", "38 5 PAX4 \n", "39 5 LYST \n", "40 5 DARS2 \n", "41 5 LOC101929680 \n", "42 5 HLA \n", "43 5 HLA \n", "44 5 KRT8 \n", "46 5 HLA \n", "50 5 VWF \n", "51 4 SDHA \n", ".. ... ... \n", "59 4 HLA \n", "63 4 HLA \n", "64 4 HLA \n", "66 4 MIR6891 \n", "68 3 HLA \n", "69 3 HLA \n", "70 3 RYR2 \n", "71 3 HLA \n", "72 3 HLA \n", "73 3 HLA \n", "75 3 AKR1C2 \n", "77 3 HLA \n", "78 3 HLA \n", "79 3 STAT6 \n", "81 3 MIR6511B1 \n", "84 3 HLA \n", "85 3 LOC107986102 \n", "86 3 LOC107986102 \n", "87 3 MST1 \n", "88 3 HLA \n", "89 3 HLA \n", "93 3 TP53 \n", "94 3 KCNJ12 \n", "95 3 PABPC1 \n", "96 3 ABCC5 \n", "99 3 GGT1 \n", "100 3 PLCB1 \n", "101 3 HLA \n", "102 3 HLA \n", "103 3 MIR6891 \n", "\n", "[67 rows x 9 columns]" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top100GeneDf[top100GeneDf.rocauc<0.7]" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
auprcrnaseq_nrnaseq_rdrocaucvcfIndexChrPostcga_wxs_countGene
00.998211519133.8225190.984290(2, 208248388)2208248388371IDH1
10.543616520122.0763360.945266(17, 7673803)17767380359TP53
20.865149519133.3759540.896410(2, 208248389)220824838938IDH1
51.000000524353.8416031.000000(15, 90088606)159008860615IDH2
60.954091520121.3728490.948889(17, 7673802)17767380215TP53
90.999940513210.7767180.996541(1, 109690516)110969051610GSTM1
100.97515449659.1354960.999370(17, 7674872)17767487210TP53
150.895074520124.6367110.929366(17, 7674220)1776742208TP53
160.85000051091.6908400.998394(17, 7675076)1776750768TP53
170.779858509107.2053740.765568(17, 7675088)1776750888TP53
241.000000520123.9560231.000000(17, 7674221)1776742217TP53
270.777019518121.8435110.875674(17, 7673776)1776737766TP53
310.99949982.0927420.781609(12, 2685853)1226858536CACNA1C
340.831944519120.8053440.871324(17, 7674230)1776742305TP53
350.79572551399.9675570.869141(17, 7674945)1776749455TP53
370.718190464182.2767180.845446(7, 55154129)7551541295EGFR
450.5597695242793.3110690.797976(6, 29942825)6299428255HLA
470.92291336821.5820610.798079(6, 29944102)6299441025HLA
480.80233337121.6087790.777366(6, 29944103)6299441035HLA
490.9422625244361.5438930.960710(6, 29944168)6299441685HLA
600.98422231516.5353730.803603(6, 29944067)6299440674HLA
620.916660399759.4716600.916426(6, 31270233)6312702334HLA
650.9161025243459.5438930.740310(6, 29944144)6299441444HLA
671.000000516115.8530531.000000(17, 7676044)1776760444TP53
740.026316436162.3067960.784810(6, 32664926)6326649263HLA
760.815908493215.2118320.928361(6, 32661393)6326613933HLA
820.9955225171391.3957530.989221(6, 29942795)6299427953HLA
830.9650235102437.3996100.856497(6, 29942858)6299428583HLA
900.85811252366.9847330.899413(3, 179234284)31792342843PIK3CA
910.84474251388.1431300.874750(17, 7674888)1776748883TP53
921.000000517117.5267181.000000(17, 7674256)1776742563TP53
970.67140650194.8541270.794175(17, 31350209)17313502093NF1
980.530668518380.9942750.738304(5, 68295269)5682952693PIK3R1
\n", "
" ], "text/plain": [ " auprc rnaseq_n rnaseq_rd rocauc vcfIndex Chr Pos \\\n", "0 0.998211 519 133.822519 0.984290 (2, 208248388) 2 208248388 \n", "1 0.543616 520 122.076336 0.945266 (17, 7673803) 17 7673803 \n", "2 0.865149 519 133.375954 0.896410 (2, 208248389) 2 208248389 \n", "5 1.000000 524 353.841603 1.000000 (15, 90088606) 15 90088606 \n", "6 0.954091 520 121.372849 0.948889 (17, 7673802) 17 7673802 \n", "9 0.999940 513 210.776718 0.996541 (1, 109690516) 1 109690516 \n", "10 0.975154 496 59.135496 0.999370 (17, 7674872) 17 7674872 \n", "15 0.895074 520 124.636711 0.929366 (17, 7674220) 17 7674220 \n", "16 0.850000 510 91.690840 0.998394 (17, 7675076) 17 7675076 \n", "17 0.779858 509 107.205374 0.765568 (17, 7675088) 17 7675088 \n", "24 1.000000 520 123.956023 1.000000 (17, 7674221) 17 7674221 \n", "27 0.777019 518 121.843511 0.875674 (17, 7673776) 17 7673776 \n", "31 0.999499 8 2.092742 0.781609 (12, 2685853) 12 2685853 \n", "34 0.831944 519 120.805344 0.871324 (17, 7674230) 17 7674230 \n", "35 0.795725 513 99.967557 0.869141 (17, 7674945) 17 7674945 \n", "37 0.718190 464 182.276718 0.845446 (7, 55154129) 7 55154129 \n", "45 0.559769 524 2793.311069 0.797976 (6, 29942825) 6 29942825 \n", "47 0.922913 368 21.582061 0.798079 (6, 29944102) 6 29944102 \n", "48 0.802333 371 21.608779 0.777366 (6, 29944103) 6 29944103 \n", "49 0.942262 524 4361.543893 0.960710 (6, 29944168) 6 29944168 \n", "60 0.984222 315 16.535373 0.803603 (6, 29944067) 6 29944067 \n", "62 0.916660 399 759.471660 0.916426 (6, 31270233) 6 31270233 \n", "65 0.916102 524 3459.543893 0.740310 (6, 29944144) 6 29944144 \n", "67 1.000000 516 115.853053 1.000000 (17, 7676044) 17 7676044 \n", "74 0.026316 436 162.306796 0.784810 (6, 32664926) 6 32664926 \n", "76 0.815908 493 215.211832 0.928361 (6, 32661393) 6 32661393 \n", "82 0.995522 517 1391.395753 0.989221 (6, 29942795) 6 29942795 \n", "83 0.965023 510 2437.399610 0.856497 (6, 29942858) 6 29942858 \n", "90 0.858112 523 66.984733 0.899413 (3, 179234284) 3 179234284 \n", "91 0.844742 513 88.143130 0.874750 (17, 7674888) 17 7674888 \n", "92 1.000000 517 117.526718 1.000000 (17, 7674256) 17 7674256 \n", "97 0.671406 501 94.854127 0.794175 (17, 31350209) 17 31350209 \n", "98 0.530668 518 380.994275 0.738304 (5, 68295269) 5 68295269 \n", "\n", " tcga_wxs_count Gene \n", "0 371 IDH1 \n", "1 59 TP53 \n", "2 38 IDH1 \n", "5 15 IDH2 \n", "6 15 TP53 \n", "9 10 GSTM1 \n", "10 10 TP53 \n", "15 8 TP53 \n", "16 8 TP53 \n", "17 8 TP53 \n", "24 7 TP53 \n", "27 6 TP53 \n", "31 6 CACNA1C \n", "34 5 TP53 \n", "35 5 TP53 \n", "37 5 EGFR \n", "45 5 HLA \n", "47 5 HLA \n", "48 5 HLA \n", "49 5 HLA \n", "60 4 HLA \n", "62 4 HLA \n", "65 4 HLA \n", "67 4 TP53 \n", "74 3 HLA \n", "76 3 HLA \n", "82 3 HLA \n", "83 3 HLA \n", "90 3 PIK3CA \n", "91 3 TP53 \n", "92 3 TP53 \n", "97 3 NF1 \n", "98 3 PIK3R1 " ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top100GeneDf[top100GeneDf.rocauc>0.7]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }