{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Proof of concept DWWC matrix computation" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas\n", "from neo4j.v1 import GraphDatabase\n", "import hetio.readwrite\n", "import hetio.neo4j\n", "import hetio.pathtools\n", "\n", "from hetmech.degree_weight import dwwc\n", "from hetmech.matrix import get_node_to_position" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'\n", "graph = hetio.readwrite.read_graph(url)\n", "metagraph = graph.metagraph" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "compound = 'DB01156' # Bupropion\n", "disease = 'DOID:0050742' # nicotine dependences\n", "\n", "damping_exponent = 0.4\n", "\n", "# CbGpPWpGaD contains duplicate metanodes, so DWPC is not equivalent to DWPC\n", "metapath = metagraph.metapath_from_abbrev('CbGpPWpGaD')\n", "metapath.get_unicode_str()\n", "\n", "compound_to_position = {x.identifier: i for x, i in get_node_to_position(graph, 'Compound').items()}\n", "disease_to_position = {x.identifier: i for x, i in get_node_to_position(graph, 'Disease').items()}\n", "i = compound_to_position[compound]\n", "j = disease_to_position[disease]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Cypher DWPC implementation" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MATCH path = (n0:Compound)-[:BINDS_CbG]-(n1)-[:PARTICIPATES_GpPW]-(n2)-[:PARTICIPATES_GpPW]-(n3)-[:ASSOCIATES_DaG]-(n4:Disease)\n", "USING JOIN ON n2\n", "WHERE n0.identifier = { source }\n", "AND n4.identifier = { target }\n", "AND n1 <> n3\n", "WITH\n", "[\n", "size((n0)-[:BINDS_CbG]-()),\n", "size(()-[:BINDS_CbG]-(n1)),\n", "size((n1)-[:PARTICIPATES_GpPW]-()),\n", "size(()-[:PARTICIPATES_GpPW]-(n2)),\n", "size((n2)-[:PARTICIPATES_GpPW]-()),\n", "size(()-[:PARTICIPATES_GpPW]-(n3)),\n", "size((n3)-[:ASSOCIATES_DaG]-()),\n", "size(()-[:ASSOCIATES_DaG]-(n4))\n", "] AS degrees, path\n", "RETURN\n", "count(path) AS PC,\n", "sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -{ w })) AS DWPC\n", "CPU times: user 30 ms, sys: 7.99 ms, total: 38 ms\n", "Wall time: 227 ms\n" ] } ], "source": [ "%%time\n", "query = hetio.neo4j.construct_dwpc_query(metapath, property='identifier', unique_nodes=True)\n", "print(query)\n", "\n", "driver = GraphDatabase.driver(\"bolt://neo4j.het.io\")\n", "params = {\n", " 'source': compound,\n", " 'target': disease,\n", " 'w': damping_exponent,\n", "}\n", "with driver.session() as session:\n", " result = session.run(query, params)\n", " result = result.single()\n", "result" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "142\n", "0.03287590886921623\n" ] } ], "source": [ "cypher_pc = result['PC']\n", "print(cypher_pc)\n", "cypher_dwpc = result['DWPC']\n", "print(cypher_dwpc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### hetio DWPC implementation" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "142\n", "CPU times: user 178 ms, sys: 64 µs, total: 179 ms\n", "Wall time: 179 ms\n" ] } ], "source": [ "%%time\n", "compound_id = 'Compound', compound\n", "disease_id = 'Disease', disease\n", "hetio_paths = hetio.pathtools.paths_between(\n", " graph, \n", " source=graph.node_dict[compound_id],\n", " target=graph.node_dict[disease_id],\n", " metapath=metapath,\n", " duplicates=False,\n", ")\n", "\n", "# Path count\n", "print(len(hetio_paths))\n", "\n", "# DWPC\n", "hetio_dwpc = hetio.pathtools.DWPC(hetio_paths, damping_exponent=damping_exponent)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.03287590886921622" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hetio_dwpc" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "### HetMech dwpc" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "from hetmech.path_count import dwpc" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "COMPARE\n", "dwpc_matrix shape (1552, 137)\n", "dwpc from i to j, as computed here: 0.032875908869216215\n", "dwpc from i to j, as computed by hetio: 0.03287590886921622\n", "dwpc from i to j, as computed by cypher: 0.03287590886921623\n", "CPU times: user 3.32 s, sys: 250 ms, total: 3.57 s\n", "Wall time: 3.58 s\n" ] } ], "source": [ "%%time\n", "dwpc_matrix = dwpc(graph, metapath, damping_exponent, False)\n", "\n", "def compare_dwpc(output_mat, i, j):\n", " print(\"\\nCOMPARE\")\n", " print(\"dwpc_matrix shape {}\".format(output_mat.shape))\n", " print(\"dwpc from i to j, as computed here: {}\".format(output_mat[i,j]))\n", " print(\"dwpc from i to j, as computed by hetio: {}\".format(hetio_dwpc))\n", " print(\"dwpc from i to j, as computed by cypher: {}\".format(cypher_dwpc))\n", "\n", "def compare_pc(output_mat, i, j):\n", " print(\"\\nCOMPARE\")\n", " print(\"pc_matrix shape {}\".format(output_mat.shape))\n", " print(\"pc from i to j, as computed here: {}\".format(output_mat[i,j]))\n", " print(\"pc from i to j, as computed by hetio: {}\".format(len(hetio_paths)))\n", " print(\"pc from i to j, as computed by cypher: {}\".format(cypher_pc))\n", " \n", "compare_dwpc(dwpc_matrix, i ,j)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "COMPARE\n", "pc_matrix shape (1552, 137)\n", "pc from i to j, as computed here: 142.0\n", "pc from i to j, as computed by hetio: 142\n", "pc from i to j, as computed by cypher: 142\n", "CPU times: user 3.08 s, sys: 243 ms, total: 3.32 s\n", "Wall time: 3.33 s\n" ] } ], "source": [ "%%time\n", "dwpc_matrix = dwpc(graph, metapath, 0.00, False)\n", "\n", "compare_pc(dwpc_matrix, i, j)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Compare time for all-pairs computations via hetio, vs via matrix method\n", "Matrix method total time: 6.99s for all 212624 pairs dwpc\n", "hetio method total time (estimated): 39548s for all 212624 pairs dwpc\n" ] } ], "source": [ "print(\"Compare time for all-pairs computations via hetio, vs via matrix method\")\n", "print(\"Matrix method total time: {}s for all {} pairs dwpc\".format((3.43+3.56), 1552*137))\n", "print(\"hetio method total time (estimated): {:6.0f}s for all {} pairs dwpc\".format( .186*1552*137, 1552*137 ))\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Preprocessing begin.\n", "Preprocessing done.\n", "Testing dwpc with exponent 0\n", "Testing dwpc with exponent 0.2\n", "Testing dwpc with exponent 0.4\n", "Testing dwpc with exponent 0.7\n", "Testing dwpc with exponent 1\n" ] } ], "source": [ "import hetmech.test_path_count\n", "hetmech.test_path_count.test_all()" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:hetmech]", "language": "python", "name": "conda-env-hetmech-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }