{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "fa924e02", "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "from math import floor\n", "import matplotlib\n", "from matplotlib import pyplot, patches\n", "pyplot.style.use(\"ggplot\")" ] }, { "cell_type": "markdown", "id": "41eca98a", "metadata": {}, "source": [ "# Plot Figure 2 (FDR curves for $p$-mutations) for the paper" ] }, { "cell_type": "markdown", "id": "e0c4fbc4", "metadata": {}, "source": [ "## Load computed FDR estimate and (number of mutations per megabase) information" ] }, { "cell_type": "code", "execution_count": 2, "id": "0dcbeb3d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "All 11 decoy context(s) we found:\n", " - CP2\n", " - CP2Nonsense\n", " - CP2Nonsyn\n", " - CP2Tv\n", " - CP2TvNonsense\n", " - Full\n", " - Nonsense\n", " - Nonsyn\n", " - Tv\n", " - TvNonsense\n", " - TvNonsyn\n" ] }, { "data": { "text/html": [ "
\n", " | p15 | \n", "p16 | \n", "p17 | \n", "p18 | \n", "p19 | \n", "p20 | \n", "p21 | \n", "p22 | \n", "p23 | \n", "p24 | \n", "... | \n", "p490 | \n", "p491 | \n", "p492 | \n", "p493 | \n", "p494 | \n", "p495 | \n", "p496 | \n", "p497 | \n", "p498 | \n", "p499 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Contig | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
edge_1 | \n", "26.421205 | \n", "21.959295 | \n", "17.710583 | \n", "15.517699 | \n", "13.263902 | \n", "11.497412 | \n", "9.791835 | \n", "8.878134 | \n", "7.61418 | \n", "6.761392 | \n", "... | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
edge_10 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
edge_100 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
edge_1000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
edge_10000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
5 rows × 485 columns
\n", "\n", " | p15 | \n", "p16 | \n", "p17 | \n", "p18 | \n", "p19 | \n", "p20 | \n", "p21 | \n", "p22 | \n", "p23 | \n", "p24 | \n", "... | \n", "p490 | \n", "p491 | \n", "p492 | \n", "p493 | \n", "p494 | \n", "p495 | \n", "p496 | \n", "p497 | \n", "p498 | \n", "p499 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Contig | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
edge_1 | \n", "16630.860609 | \n", "16630.860609 | \n", "16630.860609 | \n", "16630.860609 | \n", "16630.860609 | \n", "16630.860609 | \n", "16630.860609 | \n", "16630.860609 | \n", "16630.860609 | \n", "16630.860609 | \n", "... | \n", "25.274864 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
edge_10 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
edge_100 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
edge_1000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
edge_10000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "0.0 | \n", "
5 rows × 485 columns
\n", "\n", " | AverageCoverage | \n", "Length | \n", "DivIdx(p=25,minSuffCov=2000.0) | \n", "DivIdx(p=50,minSuffCov=1000.0) | \n", "DivIdx(p=100,minSuffCov=500.0) | \n", "DivIdx(p=200,minSuffCov=250.0) | \n", "DivIdx(p=500,minSuffCov=100.0) | \n", "DivIdx(p=1000,minSuffCov=50.0) | \n", "DivIdx(p=2500,minSuffCov=20.0) | \n", "DivIdx(p=5000,minSuffCov=10.0) | \n", "
---|---|---|---|---|---|---|---|---|---|---|
Contig | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
edge_3030 | \n", "1035.740707 | \n", "1915304 | \n", "NaN | \n", "0.041088 | \n", "0.033952 | \n", "0.031322 | \n", "0.025060 | \n", "0.017557 | \n", "4.156003e-04 | \n", "1.566332e-06 | \n", "
edge_3402 | \n", "877.282137 | \n", "3713351 | \n", "NaN | \n", "0.024924 | \n", "0.018800 | \n", "0.012130 | \n", "0.002100 | \n", "0.000195 | \n", "1.008768e-04 | \n", "2.706565e-07 | \n", "
edge_7356 | \n", "742.905564 | \n", "1602179 | \n", "NaN | \n", "0.024833 | \n", "0.021453 | \n", "0.017328 | \n", "0.006580 | \n", "0.001584 | \n", "2.563919e-04 | \n", "0.000000e+00 | \n", "
edge_7354 | \n", "1119.104000 | \n", "1036318 | \n", "NaN | \n", "0.023789 | \n", "0.018875 | \n", "0.015372 | \n", "0.009458 | \n", "0.005996 | \n", "1.543928e-05 | \n", "0.000000e+00 | \n", "
edge_7349 | \n", "1325.510426 | \n", "1175877 | \n", "NaN | \n", "0.012973 | \n", "0.010487 | \n", "0.008060 | \n", "0.003285 | \n", "0.001409 | \n", "3.500673e-04 | \n", "1.713526e-06 | \n", "
edge_1671 | \n", "1415.072755 | \n", "2153394 | \n", "NaN | \n", "0.011340 | \n", "0.010501 | \n", "0.008338 | \n", "0.003400 | \n", "0.000240 | \n", "4.679987e-07 | \n", "0.000000e+00 | \n", "
edge_23917 | \n", "1987.798829 | \n", "1020213 | \n", "0.013810 | \n", "0.009018 | \n", "0.003313 | \n", "0.000775 | \n", "0.000467 | \n", "0.000306 | \n", "0.000000e+00 | \n", "0.000000e+00 | \n", "
edge_11863 | \n", "1057.411793 | \n", "2597011 | \n", "NaN | \n", "0.001933 | \n", "0.000818 | \n", "0.000670 | \n", "0.000345 | \n", "0.000208 | \n", "1.136314e-04 | \n", "3.898408e-07 | \n", "
edge_4773 | \n", "1176.249727 | \n", "1371455 | \n", "NaN | \n", "0.001268 | \n", "0.000947 | \n", "0.000865 | \n", "0.000841 | \n", "0.000556 | \n", "0.000000e+00 | \n", "0.000000e+00 | \n", "
edge_1371 | \n", "2215.936973 | \n", "1634973 | \n", "0.002513 | \n", "0.001173 | \n", "0.000991 | \n", "0.000909 | \n", "0.000651 | \n", "0.000081 | \n", "5.749330e-05 | \n", "0.000000e+00 | \n", "