{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Permuted filenames: ['hetnet_perm-1.json.bz2', 'hetnet_perm-2.json.bz2', 'hetnet_perm-3.json.bz2', 'hetnet_perm-4.json.bz2', 'hetnet_perm-5.json.bz2']\n", "Starting Neo4j Server...WARNING: not changing user\n", "process [11964]... waiting for server to be ready..Starting Neo4j Server...WARNING: not changing user\n", "process [12181]... waiting for server to be ready...Starting Neo4j Server...WARNING: not changing user\n", "process [12406]... waiting for server to be ready....Starting Neo4j Server...WARNING: not changing user\n", "process [12640]... waiting for server to be ready.....Starting Neo4j Server...WARNING: not changing user\n", "process [12883]... waiting for server to be ready..........Starting Neo4j Server...WARNING: not changing user\n", "process [13174]... waiting for server to be ready........................................................ OK.\n", "http://localhost:7500/ is ready.\n", "........... OK.\n", "http://localhost:7503/ is ready.\n", " OK.\n", "http://localhost:7501/ is ready.\n", ".... OK.\n", "http://localhost:7502/ is ready.\n", ".. OK.\n", "http://localhost:7504/ is ready.\n", " OK.\n", "http://localhost:7505/ is ready.\n", "Stopping Neo4j Server [12406]....... done\n", "Stopping Neo4j Server [12181]........ done\n", "Stopping Neo4j Server [11964]......... done\n", "Stopping Neo4j Server [12640]........... done\n", "Stopping Neo4j Server [13174]....... done\n", "Stopping Neo4j Server [12883]....... done\n", "Complete\n" ] } ], "source": [ "%%script python\n", "\n", "import re\n", "import os\n", "import tarfile\n", "import shutil\n", "import subprocess\n", "import concurrent.futures\n", "import collections\n", "import textwrap\n", "\n", "import urllib.request\n", "import py2neo\n", "\n", "import hetio.readwrite\n", "import hetio.neo4j\n", "\n", "\n", "def replace_text(path, find, repl):\n", " \"\"\"\n", " Read a text file, replace the text specified by find with repl,\n", " and overwrite the file with the modified version.\n", " \"\"\"\n", " with open(path) as read_file:\n", " text = read_file.read()\n", " pattern = re.escape(find)\n", " text = re.sub(pattern, repl, text)\n", " with open(path, 'wt') as write_file:\n", " write_file.write(text)\n", "\n", "def append_text(path, text):\n", " \"\"\"Append text to a file, preceeded by a newline\"\"\"\n", " with open(path, 'at') as write_file:\n", " write_file.write('\\n')\n", " write_file.write(text)\n", "\n", "def create_instance(version, db_id, port=7474, overwrite=False):\n", " \"\"\"Create neo4j instance\"\"\"\n", "\n", " # Download neo4j\n", " filename = '{}-unix.tar.gz'.format(version)\n", " path = os.path.join('neo4j', filename)\n", " if not os.path.exists(path):\n", " url = 'http://neo4j.com/artifact.php?name={}'.format(filename)\n", " urllib.request.urlretrieve(url, path)\n", "\n", " # Extract to file\n", " tar_file = tarfile.open(path, 'r:gz')\n", " tar_file.extractall('neo4j')\n", " directory = os.path.join('neo4j', '{}_{}'.format(version, db_id))\n", " if os.path.isdir(directory) and overwrite:\n", " shutil.rmtree(directory)\n", " os.rename(os.path.join('neo4j', version), directory)\n", "\n", " # Modify neo4j-server.properties\n", " path = os.path.join(directory, 'conf', 'neo4j-server.properties')\n", " # disable auth to access Neo4j\n", " replace_text(path, 'dbms.security.auth_enabled=true', 'dbms.security.auth_enabled=false')\n", " replace_text(path,\n", " 'org.neo4j.server.webserver.port=7474',\n", " 'org.neo4j.server.webserver.port={}'.format(port))\n", " replace_text(path,\n", " 'org.neo4j.server.webserver.https.enabled=true',\n", " 'org.neo4j.server.webserver.https.enabled=false')\n", "\n", " # Modify neo4j.properties\n", " path = os.path.join(directory, 'conf', 'neo4j.properties')\n", " # keep only the most recent non-empty log\n", " # http://neo4j.com/docs/stable/configuration-logical-logs.html\n", " replace_text(path, '#keep_logical_logs=7 days', 'keep_logical_logs=false')\n", "\n", " append_text(path, '\\n')\n", " text = textwrap.dedent('''\\\n", " # Decrease checkpointing.\n", " # See https://github.com/neo4j/neo4j/issues/6787#issuecomment-202808178\n", " dbms.checkpoint.interval.time=180m\n", " dbms.checkpoint.interval.tx=10000000\n", " ''')\n", " append_text(path, text)\n", " \n", " return directory\n", "\n", "def hetnet_to_neo4j(path, neo4j_dir, port, database_path='data/graph.db'):\n", " \"\"\"\n", " Read a hetnet from file and import it into a new neo4j instance.\n", " \"\"\"\n", " neo4j_bin = os.path.join(neo4j_dir, 'bin', 'neo4j')\n", " subprocess.run([neo4j_bin, 'start'])\n", " error = None\n", " try:\n", " graph = hetio.readwrite.read_graph(path)\n", " uri = 'http://localhost:{}/db/data/'.format(port)\n", " hetio.neo4j.export_neo4j(graph, uri, 1000, 250)\n", " except Exception as e:\n", " error = e\n", " print(neo4j_dir, e)\n", " finally:\n", " subprocess.run([neo4j_bin, 'stop'])\n", " if not error:\n", " database_dir = os.path.join(neo4j_dir, database_path)\n", " remove_logs(database_dir)\n", "\n", "def remove_logs(database_dir):\n", " \"\"\"Should only run when server is shutdown.\"\"\"\n", " filenames = os.listdir(database_dir)\n", " removed = list()\n", " for filename in filenames:\n", " if (filename.startswith('neostore.transaction.db') or\n", " filename.startswith('messages.log')):\n", " path = os.path.join(database_dir, filename)\n", " os.remove(path)\n", " removed.append(filename)\n", " return removed\n", "\n", "if __name__ == \"__main__\":\n", " # Options\n", " neo4j_version = 'neo4j-community-2.3.3'\n", " db_name = 'rephetio-v2.0'\n", " port_0 = 7500\n", "\n", " # Identify permuted network files\n", " permuted_filenames = sorted(x for x in os.listdir('data/permuted') if 'hetnet_perm' in x)\n", " print('Permuted filenames:', permuted_filenames)\n", "\n", " # Initiate Pool\n", " pool = concurrent.futures.ProcessPoolExecutor(max_workers = 6)\n", " port_to_future = collections.OrderedDict()\n", " \n", " # Export unpermuted network to neo4j\n", " neo4j_dir = create_instance(neo4j_version, db_name, port_0, overwrite=True)\n", " future = pool.submit(hetnet_to_neo4j, path='data/hetnet.json.bz2', neo4j_dir=neo4j_dir, port=port_0)\n", " port_to_future[port_0] = future\n", "\n", " # Export permuted network to neo4j\n", " for i, filename in enumerate(permuted_filenames):\n", " i += 1\n", " port = port_0 + i\n", " db_id = '{}_perm-{}'.format(db_name, i)\n", " neo4j_dir = create_instance(neo4j_version, db_id, port, overwrite=True)\n", " path = os.path.join('data', 'permuted', filename)\n", " future = pool.submit(hetnet_to_neo4j, path=path, neo4j_dir=neo4j_dir, port = port)\n", " port_to_future[port] = future\n", "\n", " # Shutdown pool\n", " pool.shutdown()\n", " print('Complete')\n", " \n", " # Print Exceptions\n", " for port, future in port_to_future.items():\n", " exception = future.exception()\n", " if exception is None:\n", " continue\n", " print('\\nERROR: Exception importing on port {}:'.format(port))\n", " print(exception)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }