# Proof of concept DWWC matrix computation

In [1]:
import pandas
from neo4j.v1 import GraphDatabase
import hetio.readwrite
import hetio.neo4j
import hetio.pathtools

from hetmech.degree_weight import dwwc
from hetmech.matrix import get_node_to_position

In [2]:
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'
graph = hetio.readwrite.read_graph(url)
metagraph = graph.metagraph

In [3]:
compound = 'DB01156' # Bupropion
disease = 'DOID:0050742' # nicotine dependences

damping_exponent = 0.4

# CbGpPWpGaD contains duplicate metanodes, so DWPC is not equivalent to DWPC
metapath = metagraph.metapath_from_abbrev('CbGpPWpGaD')
metapath.get_unicode_str()

compound_to_position = {x.identifier: i for x, i in get_node_to_position(graph, 'Compound').items()}
disease_to_position = {x.identifier: i for x, i in get_node_to_position(graph, 'Disease').items()}
i = compound_to_position[compound]
j = disease_to_position[disease]

### Cypher DWPC implementation

In [4]:
%%time
query = hetio.neo4j.construct_dwpc_query(metapath, property='identifier', unique_nodes=True)
print(query)

driver = GraphDatabase.driver("bolt://neo4j.het.io")
params = {
 'source': compound,
 'target': disease,
 'w': damping_exponent,
}
with driver.session() as session:
 result = session.run(query, params)
 result = result.single()
result

MATCH path = (n0:Compound)-[:BINDS_CbG]-(n1)-[:PARTICIPATES_GpPW]-(n2)-[:PARTICIPATES_GpPW]-(n3)-[:ASSOCIATES_DaG]-(n4:Disease)
USING JOIN ON n2
WHERE n0.identifier = { source }
AND n4.identifier = { target }
AND n1 <> n3
WITH
[
size((n0)-[:BINDS_CbG]-()),
size(()-[:BINDS_CbG]-(n1)),
size((n1)-[:PARTICIPATES_GpPW]-()),
size(()-[:PARTICIPATES_GpPW]-(n2)),
size((n2)-[:PARTICIPATES_GpPW]-()),
size(()-[:PARTICIPATES_GpPW]-(n3)),
size((n3)-[:ASSOCIATES_DaG]-()),
size(()-[:ASSOCIATES_DaG]-(n4))
] AS degrees, path
RETURN
count(path) AS PC,
sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -{ w })) AS DWPC
CPU times: user 30 ms, sys: 7.99 ms, total: 38 ms
Wall time: 227 ms


In [5]:
cypher_pc = result['PC']
print(cypher_pc)
cypher_dwpc = result['DWPC']
print(cypher_dwpc)

142
0.03287590886921623


### hetio DWPC implementation

In [6]:
%%time
compound_id = 'Compound', compound
disease_id = 'Disease', disease
hetio_paths = hetio.pathtools.paths_between(
 graph, 
 source=graph.node_dict[compound_id],
 target=graph.node_dict[disease_id],
 metapath=metapath,
 duplicates=False,
)

# Path count
print(len(hetio_paths))

# DWPC
hetio_dwpc = hetio.pathtools.DWPC(hetio_paths, damping_exponent=damping_exponent)

142
CPU times: user 178 ms, sys: 64 µs, total: 179 ms
Wall time: 179 ms


In [7]:
hetio_dwpc

0.03287590886921622

### HetMech dwpc

In [8]:
from hetmech.path_count import dwpc

In [9]:
%%time
dwpc_matrix = dwpc(graph, metapath, damping_exponent, False)

def compare_dwpc(output_mat, i, j):
 print("\nCOMPARE")
 print("dwpc_matrix shape {}".format(output_mat.shape))
 print("dwpc from i to j, as computed here: {}".format(output_mat[i,j]))
 print("dwpc from i to j, as computed by hetio: {}".format(hetio_dwpc))
 print("dwpc from i to j, as computed by cypher: {}".format(cypher_dwpc))

def compare_pc(output_mat, i, j):
 print("\nCOMPARE")
 print("pc_matrix shape {}".format(output_mat.shape))
 print("pc from i to j, as computed here: {}".format(output_mat[i,j]))
 print("pc from i to j, as computed by hetio: {}".format(len(hetio_paths)))
 print("pc from i to j, as computed by cypher: {}".format(cypher_pc))
 
compare_dwpc(dwpc_matrix, i ,j)


COMPARE
dwpc_matrix shape (1552, 137)
dwpc from i to j, as computed here: 0.032875908869216215
dwpc from i to j, as computed by hetio: 0.03287590886921622
dwpc from i to j, as computed by cypher: 0.03287590886921623
CPU times: user 3.32 s, sys: 250 ms, total: 3.57 s
Wall time: 3.58 s


In [10]:
%%time
dwpc_matrix = dwpc(graph, metapath, 0.00, False)

compare_pc(dwpc_matrix, i, j)


COMPARE
pc_matrix shape (1552, 137)
pc from i to j, as computed here: 142.0
pc from i to j, as computed by hetio: 142
pc from i to j, as computed by cypher: 142
CPU times: user 3.08 s, sys: 243 ms, total: 3.32 s
Wall time: 3.33 s


In [11]:
print("Compare time for all-pairs computations via hetio, vs via matrix method")
print("Matrix method total time: {}s for all {} pairs dwpc".format((3.43+3.56), 1552*137))
print("hetio method total time (estimated): {:6.0f}s for all {} pairs dwpc".format( .186*1552*137, 1552*137 ))


Compare time for all-pairs computations via hetio, vs via matrix method
Matrix method total time: 6.99s for all 212624 pairs dwpc
hetio method total time (estimated): 39548s for all 212624 pairs dwpc


In [16]:
import hetmech.test_path_count
hetmech.test_path_count.test_all()

Preprocessing begin.
Preprocessing done.
Testing dwpc with exponent 0
Testing dwpc with exponent 0.2
Testing dwpc with exponent 0.4
Testing dwpc with exponent 0.7
Testing dwpc with exponent 1
