{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Collapsed source/target edge contributions to epilepsy predictions" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import re\n", "import itertools\n", "import collections\n", "\n", "import pandas\n", "\n", "from utilities import tidy_split" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Read Project Rephetio DrugBank Info\n", "url = 'https://github.com/dhimmel/drugbank/raw/7b94454b14a2fa4bb9387cb3b4b9924619cfbd3e/data/drugbank-slim.tsv'\n", "drugbank_df = (pandas.read_table(url)\n", " .rename(columns={'drugbank_id': 'compound_id', 'name': 'compound_name'})\n", " [['compound_id', 'compound_name', 'atc_codes', 'categories']]\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
compound_namedisease_pctlphcodbcompound_idatc_codescategories
0Topiramate1.0000DMDB00273N03AX11Anticonvulsants|Anti-Obesity Agents|Neuroprote...
1Ethotoin0.9993NaNDB00754N03AB01Anticonvulsants
\n", "
" ], "text/plain": [ " compound_name disease_pctl phcodb compound_id atc_codes \\\n", "0 Topiramate 1.0000 DM DB00273 N03AX11 \n", "1 Ethotoin 0.9993 NaN DB00754 N03AB01 \n", "\n", " categories \n", "0 Anticonvulsants|Anti-Obesity Agents|Neuroprote... \n", "1 Anticonvulsants " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Read top epilepsy predictions\n", "top_compounds_df = (pandas.read_table('./data/windows.tsv')\n", " .rename(columns={'name': 'compound_name'})\n", " [['compound_name', 'disease_pctl', 'phcodb']]\n", " .merge(drugbank_df)\n", ")\n", "top_compounds_df.head(2)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nodespercent_of_predictionpercent_of_DWPCsource_edgetarget_edgemetapath
0Topiramate—migraine—epilepsy syndrome0.17801.000Topiramate—treats—migraineepilepsy syndrome—resembles—migraineCtDrD
1Topiramate—GRIK5—epilepsy syndrome0.03850.249Topiramate—binds—GRIK5epilepsy syndrome—associates—GRIK5CbGaD
\n", "
" ], "text/plain": [ " nodes percent_of_prediction \\\n", "0 Topiramate—migraine—epilepsy syndrome 0.1780 \n", "1 Topiramate—GRIK5—epilepsy syndrome 0.0385 \n", "\n", " percent_of_DWPC source_edge \\\n", "0 1.000 Topiramate—treats—migraine \n", "1 0.249 Topiramate—binds—GRIK5 \n", "\n", " target_edge metapath \n", "0 epilepsy syndrome—resembles—migraine CtDrD \n", "1 epilepsy syndrome—associates—GRIK5 CbGaD " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path_dfs = list()\n", "for compound_id in top_compounds_df.compound_id:\n", " path = '../../het.io-rep-data/prediction-info/{}/DOID_1826/paths.tsv'.format(compound_id)\n", " path_dfs.append(pandas.read_table(path))\n", "path_df = pandas.concat(path_dfs)\n", "path_df.head(2)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def summarize(df):\n", " s = pandas.Series()\n", " s['paths'] = len(df)\n", " s['contribution'] = sum(df.percent_of_prediction)\n", " return s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Metapath contributions" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
metapathpathscontribution
1CbGbCtD6358.020.623795
10CrCtD160.018.265600
\n", "
" ], "text/plain": [ " metapath paths contribution\n", "1 CbGbCtD 6358.0 20.623795\n", "10 CrCtD 160.0 18.265600" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metapath_df = (path_df\n", " .groupby('metapath')\n", " .apply(summarize).reset_index()\n", " .sort_values('contribution', ascending=False)\n", ")\n", "metapath_df.head(2)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "12" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metapath_df.to_csv('data/metapath-contributions.tsv', sep='\\t', index=False, float_format='%.5g')\n", "len(metapath_df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Source edge contributions" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
source_edgepathscontribution
1437Compound—includes—Decreased Central Nervous Sy...238.06.341200
1429Compound—includes—Benzodiazepines52.03.844600
104Compound—binds—GABRA112385.02.819223
1519Compound—resembles—Diazepam402.02.708075
1438Compound—includes—General Anesthesia6.02.456000
\n", "
" ], "text/plain": [ " source_edge paths contribution\n", "1437 Compound—includes—Decreased Central Nervous Sy... 238.0 6.341200\n", "1429 Compound—includes—Benzodiazepines 52.0 3.844600\n", "104 Compound—binds—GABRA1 12385.0 2.819223\n", "1519 Compound—resembles—Diazepam 402.0 2.708075\n", "1438 Compound—includes—General Anesthesia 6.0 2.456000" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "source_df = (path_df\n", " .assign(source_edge = path_df.source_edge.map(lambda x: 'Compound—' + x.split('—', 1)[1]))\n", " .groupby('source_edge')\n", " .apply(summarize).reset_index()\n", " .sort_values('contribution', ascending=False)\n", ")\n", "source_df.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1667" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "source_df.to_csv('data/source-edge-contributions.tsv', sep='\\t', index=False, float_format='%.5g')\n", "len(source_df)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
source_edgepathscontribution
0Compound—binds266192.043.778627
4Compound—resembles8507.030.306896
2Compound—includes322.015.050900
3Compound—palliates212.05.319900
1Compound—causes117724.04.418177
5Compound—treats5.01.130000
\n", "
" ], "text/plain": [ " source_edge paths contribution\n", "0 Compound—binds 266192.0 43.778627\n", "4 Compound—resembles 8507.0 30.306896\n", "2 Compound—includes 322.0 15.050900\n", "3 Compound—palliates 212.0 5.319900\n", "1 Compound—causes 117724.0 4.418177\n", "5 Compound—treats 5.0 1.130000" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Source metaedge contributions\n", "(path_df\n", " .assign(source_edge = path_df.source_edge.map(lambda x: 'Compound—' + x.split('—')[1]))\n", " .groupby('source_edge')\n", " .apply(summarize).reset_index()\n", " .sort_values('contribution', ascending=False)\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Target edge contributions" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
target_edgepathscontribution
355epilepsy syndrome—treats—Diazepam6843.08.123404
354epilepsy syndrome—treats—Clonazepam6488.06.273890
362epilepsy syndrome—treats—Midazolam4832.06.116992
353epilepsy syndrome—treats—Clobazam4159.05.670810
351epilepsy syndrome—treats—Amobarbital2002.04.840363
\n", "
" ], "text/plain": [ " target_edge paths contribution\n", "355 epilepsy syndrome—treats—Diazepam 6843.0 8.123404\n", "354 epilepsy syndrome—treats—Clonazepam 6488.0 6.273890\n", "362 epilepsy syndrome—treats—Midazolam 4832.0 6.116992\n", "353 epilepsy syndrome—treats—Clobazam 4159.0 5.670810\n", "351 epilepsy syndrome—treats—Amobarbital 2002.0 4.840363" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_df = (path_df\n", " .groupby('target_edge')\n", " .apply(summarize).reset_index()\n", " .sort_values('contribution', ascending=False)\n", ")\n", "target_df.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "375" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_df.to_csv('data/target-edge-contributions.tsv', sep='\\t', index=False, float_format='%.5g')\n", "len(target_df)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
target_edgepathscontribution
3epilepsy syndrome—treats127343.075.582070
0epilepsy syndrome—associates255092.021.689669
1epilepsy syndrome—localizes10522.01.602761
2epilepsy syndrome—resembles5.01.130000
\n", "
" ], "text/plain": [ " target_edge paths contribution\n", "3 epilepsy syndrome—treats 127343.0 75.582070\n", "0 epilepsy syndrome—associates 255092.0 21.689669\n", "1 epilepsy syndrome—localizes 10522.0 1.602761\n", "2 epilepsy syndrome—resembles 5.0 1.130000" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Source metaedge contributions\n", "(path_df\n", " .assign(target_edge = path_df.target_edge.map(lambda x: 'epilepsy syndrome—' + x.split('—')[-2]))\n", " .groupby('target_edge')\n", " .apply(summarize).reset_index()\n", " .sort_values('contribution', ascending=False)\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Anatomy (intermediate node) contributions" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
anatomypathscontribution
21telencephalon928.00.153357
7forebrain834.00.147358
\n", "
" ], "text/plain": [ " anatomy paths contribution\n", "21 telencephalon 928.0 0.153357\n", "7 forebrain 834.0 0.147358" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "anatomy_df = path_df.query(\"metapath == 'CbGeAlD'\").copy()\n", "anatomy_df['anatomy'] = anatomy_df.nodes.map(lambda x: x.split('—')[2])\n", "anatomy_df = (anatomy_df\n", " .groupby('anatomy')\n", " .apply(summarize).reset_index()\n", " .sort_values('contribution', ascending=False)\n", ")\n", "anatomy_df.head(2)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "24" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "anatomy_df.to_csv('data/anatomy-node-contributions.tsv', sep='\\t', index=False, float_format='%.5g')\n", "len(anatomy_df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Classes of predicted compounds" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_counts(df):\n", " s = pandas.Series()\n", " s['count'] = len(df)\n", " s['compounds'] = ', '.join(sorted(df.compound_name))\n", " return s" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Third-level ATC Codes" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "91 compounds have at least 1 ATC code\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
atc_codeatc_namecountcompounds
10N03Aantiepileptics25Carbamazepine, Clonazepam, Ethosuximide, Ethot...
13N05Chypnotics and sedatives21Amobarbital, Aprobarbital, Cinolazepam, Estazo...
\n", "
" ], "text/plain": [ " atc_code atc_name count \\\n", "10 N03A antiepileptics 25 \n", "13 N05C hypnotics and sedatives 21 \n", "\n", " compounds \n", "10 Carbamazepine, Clonazepam, Ethosuximide, Ethot... \n", "13 Amobarbital, Aprobarbital, Cinolazepam, Estazo... " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Anatomical Therapeutic Chemical Classification System\n", "\n", "# Read ATC Code to name mapping http://biology.stackexchange.com/a/55023/28907\n", "url = 'https://github.com/OHDSI/Vocabulary-v4.5/raw/661804cf3c17add61b02e2e83e477f48acb011d5/21-ATC/atc_code.txt'\n", "atc_df = (pandas.read_table(url)\n", " .rename(columns={'Code': 'atc_code', 'Description': 'atc_name'})\n", ")\n", "df = tidy_split(top_compounds_df, 'atc_codes')\n", "print('{} compounds have at least 1 ATC code'.format(df.compound_name.nunique()))\n", "\n", "# The third level of the code indicates the therapeutic/pharmacological subgroup and consists of one letter.\n", "df['atc_code'] = df.pop('atc_codes').str.slice(0, 4)\n", "df = df.drop_duplicates()\n", "df = df.merge(atc_df, how='left')\n", "df.atc_name = df.atc_name.str.lower()\n", "df = df.groupby(['atc_code', 'atc_name']).apply(get_counts).reset_index().sort_values('count', ascending=False)\n", "df.to_csv('data/compounds-third-level-atc-codes.tsv', sep='\\t', index=False, float_format='%.5g')\n", "df.head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### DrugBank Categories" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "84 compounds have at least 1 category\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
categorycountcompounds
16Anticonvulsants28Acetazolamide, Carbamazepine, Clobazam, Clonaz...
46Hypnotics and Sedatives24Alprazolam, Butabarbital, Butethal, Chlordiaze...
\n", "
" ], "text/plain": [ " category count \\\n", "16 Anticonvulsants 28 \n", "46 Hypnotics and Sedatives 24 \n", "\n", " compounds \n", "16 Acetazolamide, Carbamazepine, Clobazam, Clonaz... \n", "46 Alprazolam, Butabarbital, Butethal, Chlordiaze... " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = tidy_split(top_compounds_df, 'categories')\n", "print('{} compounds have at least 1 category'.format(df.compound_name.nunique()))\n", "df = df.rename(columns={'categories': 'category'})\n", "df = df.groupby(['category']).apply(get_counts).reset_index().sort_values('count', ascending=False)\n", "df.to_csv('data/compounds-categories.tsv', sep='\\t', index=False, float_format='%.5g')\n", "df.head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### DurgCentral Pharmacologic Classes" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
compound_idclass_idclass_nameclass_sourceclass_type
0DB00126CHEBI:21241vitamin CCHEBIApplication
1DB00676CHEBI:22153acaricideCHEBIApplication
\n", "
" ], "text/plain": [ " compound_id class_id class_name class_source class_type\n", "0 DB00126 CHEBI:21241 vitamin C CHEBI Application\n", "1 DB00676 CHEBI:22153 acaricide CHEBI Application" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "class_url = 'https://github.com/dhimmel/drugcentral/raw/e80a0c966a53ce48650d98069b126801c2793517/rephetio/classes.tsv'\n", "class_rel_url = 'https://github.com/dhimmel/drugcentral/raw/e80a0c966a53ce48650d98069b126801c2793517/rephetio/drug-to-class.tsv'\n", "class_df = (pandas.read_table(class_url)\n", " .merge(pandas.read_table(class_rel_url))\n", " .rename(columns={'drugbank_id': 'compound_id'})\n", " [['compound_id', 'class_id', 'class_name', 'class_source', 'class_type']]\n", ")\n", "\n", "class_df.head(2)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "92 compounds have at least 1 pharmacologic class\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
class_idclass_nameclass_sourceclass_typecountcompounds
89D002491Central Nervous System AgentsMeSHPharmacological Action76Acamprosate, Acetazolamide, Adinazolam, Alpraz...
90D002492Central Nervous System DepressantsMeSHPharmacological Action46Alprazolam, Amobarbital, Bromazepam, Butabarbi...
\n", "
" ], "text/plain": [ " class_id class_name class_source \\\n", "89 D002491 Central Nervous System Agents MeSH \n", "90 D002492 Central Nervous System Depressants MeSH \n", "\n", " class_type count \\\n", "89 Pharmacological Action 76 \n", "90 Pharmacological Action 46 \n", "\n", " compounds \n", "89 Acamprosate, Acetazolamide, Adinazolam, Alpraz... \n", "90 Alprazolam, Amobarbital, Bromazepam, Butabarbi... " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = top_compounds_df.merge(class_df)\n", "print('{} compounds have at least 1 pharmacologic class'.format(df.compound_name.nunique()))\n", "df = df.groupby(['class_id', 'class_name', 'class_source', 'class_type']).apply(get_counts).reset_index().sort_values('count', ascending=False)\n", "df.to_csv('data/compounds-pharmacologic-classes.tsv', sep='\\t', index=False, float_format='%.5g')\n", "df.head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Contribution by gene groups" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Read Entrez Gene\n", "url = 'https://github.com/dhimmel/entrez-gene/raw/a7362748a34211e5df6f2d185bb3246279760546/data/genes-human.tsv'\n", "gene_df = pandas.read_table(url)\n", "symbol_to_name = dict(zip(gene_df.Symbol, gene_df.description))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Split on comma not in parenthesis. http://stackoverflow.com/a/26634150/4651668\n", "gene_split = re.compile(r',\\s*(?![^()]*\\))')\n", "\n", "def summarize(df):\n", " s = pandas.Series()\n", " s['paths'] = sum(df.paths)\n", " s['contribution'] = sum(df.contribution)\n", " s['gene_symbols'] = ', '.join(df.gene_symbol)\n", " return s\n", "\n", "def contributions_by_gene(df, edge_column):\n", " df['gene_symbol'] = df[edge_column].map(lambda x: x.split('—')[-1])\n", " df['gene_name'] = df['gene_symbol'].map(symbol_to_name)\n", " df['gene_main_name'] = df.gene_name.map(lambda x: gene_split.split(x, 1)[0])\n", " return df.groupby('gene_main_name').apply(summarize).reset_index().sort_values('contribution', ascending=False)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gene_main_namepathscontributiongene_symbols
74gamma-aminobutyric acid (GABA) A receptor91967.015.329433GABRA1, GABRG2, GABRB2, GABRA5, GABRB3, GABRD,...
66cytochrome P45034323.05.585686CYP2C19, CYP3A4, CYP2E1, CYP2B6, CYP2C8, CYP2C...
\n", "
" ], "text/plain": [ " gene_main_name paths contribution \\\n", "74 gamma-aminobutyric acid (GABA) A receptor 91967.0 15.329433 \n", "66 cytochrome P450 34323.0 5.585686 \n", "\n", " gene_symbols \n", "74 GABRA1, GABRG2, GABRB2, GABRA5, GABRB3, GABRD,... \n", "66 CYP2C19, CYP3A4, CYP2E1, CYP2B6, CYP2C8, CYP2C... " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "source_bind_df = source_df[source_df.source_edge.str.startswith('Compound—binds—')].copy()\n", "source_bind_df = contributions_by_gene(source_bind_df, 'source_edge')\n", "source_bind_df.to_csv('data/source-edge-binds-contributions.tsv', sep='\\t', index=False, float_format='%.5g')\n", "source_bind_df.head(2)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gene_main_namepathscontributiongene_symbols
119gamma-aminobutyric acid (GABA) A receptor23347.06.834028GABRG2, GABRA1, GABRA5, GABRB3, GABRB2, GABRD
128glutamate receptor20142.02.284863GRIA2, GRIK2, GRIN2A, GRIN2B, GRM5, GRIA1, GRI...
\n", "
" ], "text/plain": [ " gene_main_name paths contribution \\\n", "119 gamma-aminobutyric acid (GABA) A receptor 23347.0 6.834028 \n", "128 glutamate receptor 20142.0 2.284863 \n", "\n", " gene_symbols \n", "119 GABRG2, GABRA1, GABRA5, GABRB3, GABRB2, GABRD \n", "128 GRIA2, GRIK2, GRIN2A, GRIN2B, GRM5, GRIA1, GRI... " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_bind_df = target_df[target_df.target_edge.str.contains('—associates—')].copy()\n", "target_bind_df = contributions_by_gene(target_bind_df, 'target_edge')\n", "target_bind_df.to_csv('data/target-edge-associates-contributions.tsv', sep='\\t', index=False, float_format='%.5g')\n", "target_bind_df.head(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Contribution by Side Effect (source edges)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
source_edgepathscontribution
390Compound—causes—Ataxia1312.00.069241
1057Compound—causes—Nystagmus611.00.048500
579Compound—causes—Diplopia948.00.044986
1278Compound—causes—Somnolence1577.00.043543
1416Compound—causes—Vomiting1777.00.042753
\n", "
" ], "text/plain": [ " source_edge paths contribution\n", "390 Compound—causes—Ataxia 1312.0 0.069241\n", "1057 Compound—causes—Nystagmus 611.0 0.048500\n", "579 Compound—causes—Diplopia 948.0 0.044986\n", "1278 Compound—causes—Somnolence 1577.0 0.043543\n", "1416 Compound—causes—Vomiting 1777.0 0.042753" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "side_effect_df = source_df[source_df.source_edge.str.contains('Compound—causes—')]\n", "side_effect_df.head()" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }