{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Implementation of diffusion hetmech"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas\n",
"from neo4j.v1 import GraphDatabase\n",
"import hetio.readwrite\n",
"\n",
"from hetmech.diffusion import diffuse"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'\n",
"graph = hetio.readwrite.read_graph(url)\n",
"metagraph = graph.metagraph"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(11, 24)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# MetaGraph node/edge count\n",
"metagraph.n_nodes, metagraph.n_edges"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(47031, 2250197)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Graph node/edge count\n",
"graph.n_nodes, graph.n_edges"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" gene_symbol | \n",
" gene_name | \n",
" entrez_gene_id | \n",
" sources | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" ABAT | \n",
" 4-aminobutyrate aminotransferase | \n",
" 18 | \n",
" [DisGeNET] | \n",
"
\n",
" \n",
" 1 | \n",
" ABCB1 | \n",
" ATP-binding cassette, sub-family B (MDR/TAP), ... | \n",
" 5243 | \n",
" [DISEASES, DOAF, DisGeNET] | \n",
"
\n",
" \n",
" 2 | \n",
" ABCC2 | \n",
" ATP-binding cassette, sub-family C (CFTR/MRP),... | \n",
" 1244 | \n",
" [DisGeNET] | \n",
"
\n",
" \n",
" 3 | \n",
" ABCG2 | \n",
" ATP-binding cassette, sub-family G (WHITE), me... | \n",
" 9429 | \n",
" [DisGeNET] | \n",
"
\n",
" \n",
" 4 | \n",
" ACKR4 | \n",
" atypical chemokine receptor 4 | \n",
" 51554 | \n",
" [DISEASES] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" gene_symbol gene_name \\\n",
"0 ABAT 4-aminobutyrate aminotransferase \n",
"1 ABCB1 ATP-binding cassette, sub-family B (MDR/TAP), ... \n",
"2 ABCC2 ATP-binding cassette, sub-family C (CFTR/MRP),... \n",
"3 ABCG2 ATP-binding cassette, sub-family G (WHITE), me... \n",
"4 ACKR4 atypical chemokine receptor 4 \n",
"\n",
" entrez_gene_id sources \n",
"0 18 [DisGeNET] \n",
"1 5243 [DISEASES, DOAF, DisGeNET] \n",
"2 1244 [DisGeNET] \n",
"3 9429 [DisGeNET] \n",
"4 51554 [DISEASES] "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Uses the official neo4j-python-driver. See https://github.com/neo4j/neo4j-python-driver\n",
"\n",
"query = '''\n",
"MATCH (disease:Disease)-[assoc:ASSOCIATES_DaG]-(gene:Gene)\n",
"WHERE disease.name = 'epilepsy syndrome'\n",
"RETURN\n",
" gene.name AS gene_symbol,\n",
" gene.description AS gene_name,\n",
" gene.identifier AS entrez_gene_id,\n",
" assoc.sources AS sources\n",
"ORDER BY gene_symbol\n",
"'''\n",
"\n",
"driver = GraphDatabase.driver(\"bolt://neo4j.het.io\")\n",
"with driver.session() as session:\n",
" result = session.run(query)\n",
" gene_df = pandas.DataFrame((x.values() for x in result), columns=result.keys())\n",
"\n",
"gene_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"399"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"epilepsy_genes = list()\n",
"for entrez_gene_id in gene_df.entrez_gene_id:\n",
" node_id = 'Gene', entrez_gene_id\n",
" node = graph.node_dict.get(node_id)\n",
" if node:\n",
" epilepsy_genes.append(node)\n",
"len(epilepsy_genes)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"metapath = metagraph.metapath_from_abbrev('GiGpBP')\n",
"source_node_weights = {gene: 1 for gene in epilepsy_genes}\n",
"pathway_scores = diffuse(graph, metapath, source_node_weights, column_damping=1, row_damping=1)\n",
"target_df = pandas.DataFrame(list(pathway_scores.items()), columns=['target_node', 'score'])\n",
"target_df['target_name'] = target_df.target_node.map(lambda x: graph.node_dict[('Biological Process', x)].name)\n",
"target_df = target_df.sort_values('score', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"11381"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(target_df)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"353.7693384197814"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sum(target_df.score)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"GiGpBP"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"metapath"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" target_node | \n",
" score | \n",
" target_name | \n",
"
\n",
" \n",
" \n",
" \n",
" 4751 | \n",
" GO:0035235 | \n",
" 1.091022 | \n",
" ionotropic glutamate receptor signaling pathway | \n",
"
\n",
" \n",
" 2530 | \n",
" GO:0010992 | \n",
" 1.038370 | \n",
" ubiquitin homeostasis | \n",
"
\n",
" \n",
" 1783 | \n",
" GO:0007586 | \n",
" 0.971243 | \n",
" digestion | \n",
"
\n",
" \n",
" 7663 | \n",
" GO:0060081 | \n",
" 0.948960 | \n",
" membrane hyperpolarization | \n",
"
\n",
" \n",
" 1485 | \n",
" GO:0006895 | \n",
" 0.907327 | \n",
" Golgi to endosome transport | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" target_node score target_name\n",
"4751 GO:0035235 1.091022 ionotropic glutamate receptor signaling pathway\n",
"2530 GO:0010992 1.038370 ubiquitin homeostasis\n",
"1783 GO:0007586 0.971243 digestion\n",
"7663 GO:0060081 0.948960 membrane hyperpolarization\n",
"1485 GO:0006895 0.907327 Golgi to endosome transport"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"target_df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Diagnosing ubiquitin homeostasis\n",
"\n",
"[ubiquitin homeostasis](http://amigo.geneontology.org/amigo/term/GO:0010992) contains 3 genes: [UBB, UBC, IDE]\n",
"\n",
"```cypher\n",
"MATCH (bp:BiologicalProcess)-[rel:PARTICIPATES_GpBP]-(gene)-[INTERACTS_GiG]-(gene_target)\n",
"WHERE bp.name ='ubiquitin homeostasis'\n",
"RETURN\n",
" gene.name AS ubiquitin_homeostasis_gene,\n",
" count(gene_target) AS n_interacting_genes\n",
"```\n",
"\n",
"Returns the following table:\n",
"\n",
"| ubiquitin_homeostasis_gene | n_interacting_genes |\n",
"|----------------------------|---------------------|\n",
"| IDE | 243 |\n",
"| UBC | 9371 |\n",
"| UBB | 1040 |\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:hetmech]",
"language": "python",
"name": "conda-env-hetmech-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}