{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Identify top biological pathways linked to blood pressure genes by the _GiGpBP_ metapath\n", "\n", "Proposed in https://github.com/greenelab/hetmech/pull/77." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import collections\n", "\n", "import pandas\n", "import hetio.readwrite\n", "import numpy\n", "\n", "from hetmech.degree_weight import dwpc" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "repo_url = 'https://github.com/dhimmel/hetionet'\n", "commit = '6d26d15e9055b33b4fd97a180fa288e4f2060b96'\n", "names = ['hetionet-v1.0'] + [f'hetionet-v1.0-perm-{i + 1}' for i in range(5)] \n", "paths = ['hetnet/json/hetionet-v1.0.json.bz2'] + [\n", " f'hetnet/permuted/json/{name}.json.bz2' for name in names[1:]\n", "]\n", "hetnets = collections.OrderedDict()\n", "for name, path in zip(names, paths):\n", " url = f'{repo_url}/raw/{commit}/{path}'\n", " hetnets[name] = hetio.readwrite.read_graph(url)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['hetionet-v1.0',\n", " 'hetionet-v1.0-perm-1',\n", " 'hetionet-v1.0-perm-2',\n", " 'hetionet-v1.0-perm-3',\n", " 'hetionet-v1.0-perm-4',\n", " 'hetionet-v1.0-perm-5']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(hetnets)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Computing DWPC matrix for the GiGpBP metapath in hetionet-v1.0 took 449.6 seconds\n", "Computing DWPC matrix for the GiGpBP metapath in hetionet-v1.0-perm-1 took 180.7 seconds\n", "Computing DWPC matrix for the GiGpBP metapath in hetionet-v1.0-perm-2 took 178.7 seconds\n", "Computing DWPC matrix for the GiGpBP metapath in hetionet-v1.0-perm-3 took 178.7 seconds\n", "Computing DWPC matrix for the GiGpBP metapath in hetionet-v1.0-perm-4 took 174.0 seconds\n", "Computing DWPC matrix for the GiGpBP metapath in hetionet-v1.0-perm-5 took 176.4 seconds\n" ] } ], "source": [ "DWPCs = collections.OrderedDict()\n", "for name, graph in hetnets.items():\n", " metapath = graph.metagraph.metapath_from_abbrev('GiGpBP')\n", " rows, cols, dwpc_matrix, seconds = dwpc(graph, metapath, damping=0.4)\n", " DWPCs[name] = dwpc_matrix\n", " print(f'Computing DWPC matrix for the {metapath} metapath in {name} took {seconds:.1f} seconds')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note that gneerating the DWPC matrices on the unpermuted network took longer. We may want to investigate the cause of this differential runtime, as it may provide a valuable insight." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Gene–interacts–Gene–participates–Biological Process'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metapath.get_unicode_str()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Read diffex" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrez_gene_idBP_sixCohort_meta_TEBP_sixCohort_meta_pweightweight_downweight_up
013180.0022821.000000e-150.0342240.00.034224
1916630.0025781.000000e-150.0386710.00.038671
\n", "
" ], "text/plain": [ " entrez_gene_id BP_sixCohort_meta_TE BP_sixCohort_meta_p weight \\\n", "0 1318 0.002282 1.000000e-15 0.034224 \n", "1 91663 0.002578 1.000000e-15 0.038671 \n", "\n", " weight_down weight_up \n", "0 0.0 0.034224 \n", "1 0.0 0.038671 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Differentially expressed blood pressure genes from https://doi.org/10.1371/journal.pgen.1005035\n", "url = 'https://doi.org/10.1371/journal.pgen.1005035.s006'\n", "bp_df = (\n", " pandas.read_excel(url, skiprows=[0, 2])\n", " .rename(columns={\n", " 'EntrezGeneID_FHS': 'entrez_gene_id',\n", " })\n", " .dropna(subset=['entrez_gene_id'])\n", " .drop_duplicates(subset=['entrez_gene_id'])\n", " .query(\"BP_sixCohort_meta_p < 0.001\")\n", " [['entrez_gene_id', 'BP_sixCohort_meta_TE', 'BP_sixCohort_meta_p']]\n", ")\n", "\n", "# Entrez Genes should be ints\n", "bp_df.entrez_gene_id = bp_df.entrez_gene_id.astype(int)\n", "\n", "# Replace p-values that are zero\n", "bp_df.loc[bp_df.BP_sixCohort_meta_p == 0, 'BP_sixCohort_meta_p'] = 1e-15\n", "bp_df['weight'] = bp_df.BP_sixCohort_meta_TE * -numpy.log10(bp_df.BP_sixCohort_meta_p)\n", "bp_df['weight_down'] = numpy.maximum(-bp_df.weight, 0)\n", "bp_df['weight_up'] = numpy.maximum(bp_df.weight, 0)\n", "\n", "bp_df.head(2)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " 1.0 68\n", "-1.0 65\n", "Name: weight, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pandas.Series(numpy.sign(bp_df.weight)).value_counts()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
entrez_gene_idgene_symbolweightweight_downweight_up
01A1BG0.00.00.0
12A2M0.00.00.0
\n", "
" ], "text/plain": [ " entrez_gene_id gene_symbol weight weight_down weight_up\n", "0 1 A1BG 0.0 0.0 0.0\n", "1 2 A2M 0.0 0.0 0.0" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "gene_df = (\n", " pandas.DataFrame({\n", " 'entrez_gene_id': rows,\n", " 'gene_symbol': [graph.get_node((metapath.source().identifier, x)).name for x in rows],\n", " })\n", " .merge(bp_df, how='left')\n", " [['entrez_gene_id', 'gene_symbol', 'weight', 'weight_down', 'weight_up']]\n", " .fillna(0)\n", ")\n", "\n", "gene_df.head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute target node scores" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
hetionet-v1.0hetionet-v1.0-perm-1hetionet-v1.0-perm-2hetionet-v1.0-perm-3hetionet-v1.0-perm-4hetionet-v1.0-perm-5z-score
metapathtarget_idtarget_name
GiGpBPGO:0051208sequestering of calcium ion1.2933320.0000000.0536560.0331420.0000000.01618755.307699
GO:0072236metanephric loop of Henle development1.2409330.0481210.0767810.0000000.0355370.00000036.759604
GO:0061299retina vasculature morphogenesis in camera-type eye0.8358320.0759490.1107480.0505620.0671460.05162031.135702
GO:0070426positive regulation of nucleotide-binding oligomerization domain containing signaling pathway1.7597140.0638970.0000000.0538670.0000000.14007429.612934
GO:0070318positive regulation of G0 to G1 transition1.3768720.1024260.0000000.0449120.0765640.00000029.166387
GO:0048936peripheral nervous system neuron axonogenesis1.8261850.1218040.1547610.0466650.0240110.00000026.615738
GO:0030316osteoclast differentiation1.4155080.5933210.5241890.5608690.5735320.51487126.017101
GO:0032464positive regulation of protein homooligomerization2.2608780.0000000.0838970.0173760.2068010.00000024.931638
GO:0090400stress-induced premature senescence1.3833770.0000000.0000000.1130390.0781980.09474524.720282
GO:1901099negative regulation of signal transduction in absence of ligand1.6584190.2867700.1651100.3136190.2650060.27372424.700407
GO:0030050vesicle transport along actin filament1.4574280.0000000.0242600.1271360.1177420.08036524.679673
GO:1903265positive regulation of tumor necrosis factor-mediated signaling pathway1.8212090.1027780.0000000.0000000.1745180.08179423.631223
GO:0071287cellular response to manganese ion1.6072770.2164930.1510720.2766800.1199770.16459823.068022
GO:0090435protein localization to nuclear envelope0.6348200.0480580.0345620.0689800.0262580.08905522.657843
GO:0036015response to interleukin-31.3328520.2170640.1152120.0861080.1658610.08595621.134876
GO:0033693neurofilament bundle assembly2.8109670.2849760.1675460.0000000.0000000.21510320.779777
GO:0052040modulation by symbiont of host programmed cell death1.3823600.0178870.0375020.0000000.1651820.07900720.106359
GO:2000644regulation of receptor catabolic process0.8081700.0949870.0188140.0343420.0000000.07471119.387848
GO:0002758innate immune response-activating signal transduction2.7780181.7895221.8398031.9000861.8577751.78176219.200993
GO:0097084vascular smooth muscle cell development2.3009400.0000000.1374580.1646080.0000000.27481218.646273
\n", "
" ], "text/plain": [ " hetionet-v1.0 \\\n", "metapath target_id target_name \n", "GiGpBP GO:0051208 sequestering of calcium ion 1.293332 \n", " GO:0072236 metanephric loop of Henle development 1.240933 \n", " GO:0061299 retina vasculature morphogenesis in camera-type... 0.835832 \n", " GO:0070426 positive regulation of nucleotide-binding oligo... 1.759714 \n", " GO:0070318 positive regulation of G0 to G1 transition 1.376872 \n", " GO:0048936 peripheral nervous system neuron axonogenesis 1.826185 \n", " GO:0030316 osteoclast differentiation 1.415508 \n", " GO:0032464 positive regulation of protein homooligomerization 2.260878 \n", " GO:0090400 stress-induced premature senescence 1.383377 \n", " GO:1901099 negative regulation of signal transduction in a... 1.658419 \n", " GO:0030050 vesicle transport along actin filament 1.457428 \n", " GO:1903265 positive regulation of tumor necrosis factor-me... 1.821209 \n", " GO:0071287 cellular response to manganese ion 1.607277 \n", " GO:0090435 protein localization to nuclear envelope 0.634820 \n", " GO:0036015 response to interleukin-3 1.332852 \n", " GO:0033693 neurofilament bundle assembly 2.810967 \n", " GO:0052040 modulation by symbiont of host programmed cell ... 1.382360 \n", " GO:2000644 regulation of receptor catabolic process 0.808170 \n", " GO:0002758 innate immune response-activating signal transd... 2.778018 \n", " GO:0097084 vascular smooth muscle cell development 2.300940 \n", "\n", " hetionet-v1.0-perm-1 \\\n", "metapath target_id target_name \n", "GiGpBP GO:0051208 sequestering of calcium ion 0.000000 \n", " GO:0072236 metanephric loop of Henle development 0.048121 \n", " GO:0061299 retina vasculature morphogenesis in camera-type... 0.075949 \n", " GO:0070426 positive regulation of nucleotide-binding oligo... 0.063897 \n", " GO:0070318 positive regulation of G0 to G1 transition 0.102426 \n", " GO:0048936 peripheral nervous system neuron axonogenesis 0.121804 \n", " GO:0030316 osteoclast differentiation 0.593321 \n", " GO:0032464 positive regulation of protein homooligomerization 0.000000 \n", " GO:0090400 stress-induced premature senescence 0.000000 \n", " GO:1901099 negative regulation of signal transduction in a... 0.286770 \n", " GO:0030050 vesicle transport along actin filament 0.000000 \n", " GO:1903265 positive regulation of tumor necrosis factor-me... 0.102778 \n", " GO:0071287 cellular response to manganese ion 0.216493 \n", " GO:0090435 protein localization to nuclear envelope 0.048058 \n", " GO:0036015 response to interleukin-3 0.217064 \n", " GO:0033693 neurofilament bundle assembly 0.284976 \n", " GO:0052040 modulation by symbiont of host programmed cell ... 0.017887 \n", " GO:2000644 regulation of receptor catabolic process 0.094987 \n", " GO:0002758 innate immune response-activating signal transd... 1.789522 \n", " GO:0097084 vascular smooth muscle cell development 0.000000 \n", "\n", " hetionet-v1.0-perm-2 \\\n", "metapath target_id target_name \n", "GiGpBP GO:0051208 sequestering of calcium ion 0.053656 \n", " GO:0072236 metanephric loop of Henle development 0.076781 \n", " GO:0061299 retina vasculature morphogenesis in camera-type... 0.110748 \n", " GO:0070426 positive regulation of nucleotide-binding oligo... 0.000000 \n", " GO:0070318 positive regulation of G0 to G1 transition 0.000000 \n", " GO:0048936 peripheral nervous system neuron axonogenesis 0.154761 \n", " GO:0030316 osteoclast differentiation 0.524189 \n", " GO:0032464 positive regulation of protein homooligomerization 0.083897 \n", " GO:0090400 stress-induced premature senescence 0.000000 \n", " GO:1901099 negative regulation of signal transduction in a... 0.165110 \n", " GO:0030050 vesicle transport along actin filament 0.024260 \n", " GO:1903265 positive regulation of tumor necrosis factor-me... 0.000000 \n", " GO:0071287 cellular response to manganese ion 0.151072 \n", " GO:0090435 protein localization to nuclear envelope 0.034562 \n", " GO:0036015 response to interleukin-3 0.115212 \n", " GO:0033693 neurofilament bundle assembly 0.167546 \n", " GO:0052040 modulation by symbiont of host programmed cell ... 0.037502 \n", " GO:2000644 regulation of receptor catabolic process 0.018814 \n", " GO:0002758 innate immune response-activating signal transd... 1.839803 \n", " GO:0097084 vascular smooth muscle cell development 0.137458 \n", "\n", " hetionet-v1.0-perm-3 \\\n", "metapath target_id target_name \n", "GiGpBP GO:0051208 sequestering of calcium ion 0.033142 \n", " GO:0072236 metanephric loop of Henle development 0.000000 \n", " GO:0061299 retina vasculature morphogenesis in camera-type... 0.050562 \n", " GO:0070426 positive regulation of nucleotide-binding oligo... 0.053867 \n", " GO:0070318 positive regulation of G0 to G1 transition 0.044912 \n", " GO:0048936 peripheral nervous system neuron axonogenesis 0.046665 \n", " GO:0030316 osteoclast differentiation 0.560869 \n", " GO:0032464 positive regulation of protein homooligomerization 0.017376 \n", " GO:0090400 stress-induced premature senescence 0.113039 \n", " GO:1901099 negative regulation of signal transduction in a... 0.313619 \n", " GO:0030050 vesicle transport along actin filament 0.127136 \n", " GO:1903265 positive regulation of tumor necrosis factor-me... 0.000000 \n", " GO:0071287 cellular response to manganese ion 0.276680 \n", " GO:0090435 protein localization to nuclear envelope 0.068980 \n", " GO:0036015 response to interleukin-3 0.086108 \n", " GO:0033693 neurofilament bundle assembly 0.000000 \n", " GO:0052040 modulation by symbiont of host programmed cell ... 0.000000 \n", " GO:2000644 regulation of receptor catabolic process 0.034342 \n", " GO:0002758 innate immune response-activating signal transd... 1.900086 \n", " GO:0097084 vascular smooth muscle cell development 0.164608 \n", "\n", " hetionet-v1.0-perm-4 \\\n", "metapath target_id target_name \n", "GiGpBP GO:0051208 sequestering of calcium ion 0.000000 \n", " GO:0072236 metanephric loop of Henle development 0.035537 \n", " GO:0061299 retina vasculature morphogenesis in camera-type... 0.067146 \n", " GO:0070426 positive regulation of nucleotide-binding oligo... 0.000000 \n", " GO:0070318 positive regulation of G0 to G1 transition 0.076564 \n", " GO:0048936 peripheral nervous system neuron axonogenesis 0.024011 \n", " GO:0030316 osteoclast differentiation 0.573532 \n", " GO:0032464 positive regulation of protein homooligomerization 0.206801 \n", " GO:0090400 stress-induced premature senescence 0.078198 \n", " GO:1901099 negative regulation of signal transduction in a... 0.265006 \n", " GO:0030050 vesicle transport along actin filament 0.117742 \n", " GO:1903265 positive regulation of tumor necrosis factor-me... 0.174518 \n", " GO:0071287 cellular response to manganese ion 0.119977 \n", " GO:0090435 protein localization to nuclear envelope 0.026258 \n", " GO:0036015 response to interleukin-3 0.165861 \n", " GO:0033693 neurofilament bundle assembly 0.000000 \n", " GO:0052040 modulation by symbiont of host programmed cell ... 0.165182 \n", " GO:2000644 regulation of receptor catabolic process 0.000000 \n", " GO:0002758 innate immune response-activating signal transd... 1.857775 \n", " GO:0097084 vascular smooth muscle cell development 0.000000 \n", "\n", " hetionet-v1.0-perm-5 \\\n", "metapath target_id target_name \n", "GiGpBP GO:0051208 sequestering of calcium ion 0.016187 \n", " GO:0072236 metanephric loop of Henle development 0.000000 \n", " GO:0061299 retina vasculature morphogenesis in camera-type... 0.051620 \n", " GO:0070426 positive regulation of nucleotide-binding oligo... 0.140074 \n", " GO:0070318 positive regulation of G0 to G1 transition 0.000000 \n", " GO:0048936 peripheral nervous system neuron axonogenesis 0.000000 \n", " GO:0030316 osteoclast differentiation 0.514871 \n", " GO:0032464 positive regulation of protein homooligomerization 0.000000 \n", " GO:0090400 stress-induced premature senescence 0.094745 \n", " GO:1901099 negative regulation of signal transduction in a... 0.273724 \n", " GO:0030050 vesicle transport along actin filament 0.080365 \n", " GO:1903265 positive regulation of tumor necrosis factor-me... 0.081794 \n", " GO:0071287 cellular response to manganese ion 0.164598 \n", " GO:0090435 protein localization to nuclear envelope 0.089055 \n", " GO:0036015 response to interleukin-3 0.085956 \n", " GO:0033693 neurofilament bundle assembly 0.215103 \n", " GO:0052040 modulation by symbiont of host programmed cell ... 0.079007 \n", " GO:2000644 regulation of receptor catabolic process 0.074711 \n", " GO:0002758 innate immune response-activating signal transd... 1.781762 \n", " GO:0097084 vascular smooth muscle cell development 0.274812 \n", "\n", " z-score \n", "metapath target_id target_name \n", "GiGpBP GO:0051208 sequestering of calcium ion 55.307699 \n", " GO:0072236 metanephric loop of Henle development 36.759604 \n", " GO:0061299 retina vasculature morphogenesis in camera-type... 31.135702 \n", " GO:0070426 positive regulation of nucleotide-binding oligo... 29.612934 \n", " GO:0070318 positive regulation of G0 to G1 transition 29.166387 \n", " GO:0048936 peripheral nervous system neuron axonogenesis 26.615738 \n", " GO:0030316 osteoclast differentiation 26.017101 \n", " GO:0032464 positive regulation of protein homooligomerization 24.931638 \n", " GO:0090400 stress-induced premature senescence 24.720282 \n", " GO:1901099 negative regulation of signal transduction in a... 24.700407 \n", " GO:0030050 vesicle transport along actin filament 24.679673 \n", " GO:1903265 positive regulation of tumor necrosis factor-me... 23.631223 \n", " GO:0071287 cellular response to manganese ion 23.068022 \n", " GO:0090435 protein localization to nuclear envelope 22.657843 \n", " GO:0036015 response to interleukin-3 21.134876 \n", " GO:0033693 neurofilament bundle assembly 20.779777 \n", " GO:0052040 modulation by symbiont of host programmed cell ... 20.106359 \n", " GO:2000644 regulation of receptor catabolic process 19.387848 \n", " GO:0002758 innate immune response-activating signal transd... 19.200993 \n", " GO:0097084 vascular smooth muscle cell development 18.646273 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_df = pandas.DataFrame({\n", " 'metapath': str(metapath),\n", " 'target_id': cols,\n", " 'target_name': [graph.get_node((metapath.target().identifier, x)).name for x in cols],\n", "}).set_index(['metapath', 'target_id', 'target_name'])\n", "\n", "for name, array in DWPCs.items():\n", " target_df[name] = gene_df.weight_up @ array\n", "\n", "# Scaling as per https://think-lab.github.io/d/193/#4\n", "dwpc_scaler = target_df['hetionet-v1.0'].mean()\n", "target_df = numpy.arcsinh(target_df / dwpc_scaler)\n", "\n", "perm_df = target_df.iloc[:, 1:]\n", "target_df['z-score'] = (target_df.iloc[:, 0] - perm_df.mean(axis='columns')) / perm_df.std(axis='columns')\n", "\n", "(\n", " target_df\n", " # Remove targets without sufficient nonzero DWPCs\n", " [(perm_df > 0).sum(axis='columns') >= 3]\n", " .sort_values('z-score', ascending=False)\n", " .head(20)\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:hetmech]", "language": "python", "name": "conda-env-hetmech-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }