{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Implementation of diffusion hetmech" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas\n", "from neo4j.v1 import GraphDatabase\n", "import hetio.readwrite\n", "\n", "from hetmech.diffusion import diffuse" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'\n", "graph = hetio.readwrite.read_graph(url)\n", "metagraph = graph.metagraph" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(11, 24)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# MetaGraph node/edge count\n", "metagraph.n_nodes, metagraph.n_edges" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(47031, 2250197)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Graph node/edge count\n", "graph.n_nodes, graph.n_edges" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gene_symbolgene_nameentrez_gene_idsources
0ABAT4-aminobutyrate aminotransferase18[DisGeNET]
1ABCB1ATP-binding cassette, sub-family B (MDR/TAP), ...5243[DISEASES, DOAF, DisGeNET]
2ABCC2ATP-binding cassette, sub-family C (CFTR/MRP),...1244[DisGeNET]
3ABCG2ATP-binding cassette, sub-family G (WHITE), me...9429[DisGeNET]
4ACKR4atypical chemokine receptor 451554[DISEASES]
\n", "
" ], "text/plain": [ " gene_symbol gene_name \\\n", "0 ABAT 4-aminobutyrate aminotransferase \n", "1 ABCB1 ATP-binding cassette, sub-family B (MDR/TAP), ... \n", "2 ABCC2 ATP-binding cassette, sub-family C (CFTR/MRP),... \n", "3 ABCG2 ATP-binding cassette, sub-family G (WHITE), me... \n", "4 ACKR4 atypical chemokine receptor 4 \n", "\n", " entrez_gene_id sources \n", "0 18 [DisGeNET] \n", "1 5243 [DISEASES, DOAF, DisGeNET] \n", "2 1244 [DisGeNET] \n", "3 9429 [DisGeNET] \n", "4 51554 [DISEASES] " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Uses the official neo4j-python-driver. See https://github.com/neo4j/neo4j-python-driver\n", "\n", "query = '''\n", "MATCH (disease:Disease)-[assoc:ASSOCIATES_DaG]-(gene:Gene)\n", "WHERE disease.name = 'epilepsy syndrome'\n", "RETURN\n", " gene.name AS gene_symbol,\n", " gene.description AS gene_name,\n", " gene.identifier AS entrez_gene_id,\n", " assoc.sources AS sources\n", "ORDER BY gene_symbol\n", "'''\n", "\n", "driver = GraphDatabase.driver(\"bolt://neo4j.het.io\")\n", "with driver.session() as session:\n", " result = session.run(query)\n", " gene_df = pandas.DataFrame((x.values() for x in result), columns=result.keys())\n", "\n", "gene_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "399" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "epilepsy_genes = list()\n", "for entrez_gene_id in gene_df.entrez_gene_id:\n", " node_id = 'Gene', entrez_gene_id\n", " node = graph.node_dict.get(node_id)\n", " if node:\n", " epilepsy_genes.append(node)\n", "len(epilepsy_genes)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "metapath = metagraph.metapath_from_abbrev('GiGpBP')\n", "source_node_weights = {gene: 1 for gene in epilepsy_genes}\n", "pathway_scores = diffuse(graph, metapath, source_node_weights, column_damping=1, row_damping=1)\n", "target_df = pandas.DataFrame(list(pathway_scores.items()), columns=['target_node', 'score'])\n", "target_df['target_name'] = target_df.target_node.map(lambda x: graph.node_dict[('Biological Process', x)].name)\n", "target_df = target_df.sort_values('score', ascending=False)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11381" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(target_df)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "353.7693384197814" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sum(target_df.score)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GiGpBP" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metapath" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
target_nodescoretarget_name
4751GO:00352351.091022ionotropic glutamate receptor signaling pathway
2530GO:00109921.038370ubiquitin homeostasis
1783GO:00075860.971243digestion
7663GO:00600810.948960membrane hyperpolarization
1485GO:00068950.907327Golgi to endosome transport
\n", "
" ], "text/plain": [ " target_node score target_name\n", "4751 GO:0035235 1.091022 ionotropic glutamate receptor signaling pathway\n", "2530 GO:0010992 1.038370 ubiquitin homeostasis\n", "1783 GO:0007586 0.971243 digestion\n", "7663 GO:0060081 0.948960 membrane hyperpolarization\n", "1485 GO:0006895 0.907327 Golgi to endosome transport" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "target_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Diagnosing ubiquitin homeostasis\n", "\n", "[ubiquitin homeostasis](http://amigo.geneontology.org/amigo/term/GO:0010992) contains 3 genes: [UBB, UBC, IDE]\n", "\n", "```cypher\n", "MATCH (bp:BiologicalProcess)-[rel:PARTICIPATES_GpBP]-(gene)-[INTERACTS_GiG]-(gene_target)\n", "WHERE bp.name ='ubiquitin homeostasis'\n", "RETURN\n", " gene.name AS ubiquitin_homeostasis_gene,\n", " count(gene_target) AS n_interacting_genes\n", "```\n", "\n", "Returns the following table:\n", "\n", "| ubiquitin_homeostasis_gene | n_interacting_genes |\n", "|----------------------------|---------------------|\n", "| IDE | 243 |\n", "| UBC | 9371 |\n", "| UBB | 1040 |\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:hetmech]", "language": "python", "name": "conda-env-hetmech-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }