{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Importing an edge list into Titan 0.9.0-M1(Berkeley DB) w/TP3 Gremlin Server 3.0.0-M7 and Python" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from datetime import datetime\n", "from gizmo import AsyncGremlinClient" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def build_schema():\n", " script = \"\"\"\n", " mgmt = g.openManagement();\n", " uniqueId = mgmt.makePropertyKey('uniqueId').dataType(Integer.class).make();\n", " mgmt.buildIndex('byId', Vertex.class).addKey(uniqueId).unique().buildCompositeIndex();\n", " mgmt.makeEdgeLabel('collabs').make();\n", " mgmt.commit();\"\"\"\n", " gc = AsyncGremlinClient()\n", " t = gc.s(script,\n", " consumer=lambda x: print(\"Commited tx with response code: {}\".format(x.status_code)))\n", " t.execute()\n", " \n", "\n", "def load_edges():\n", " start = datetime.now()\n", " script = \"\"\" \n", " getOrCreate = { id ->\n", " def n = g.V().has('uniqueId', id)\n", " if (n.hasNext()) {n.next()} else {g.addVertex(\"uniqueId\", id)}\n", " }\n", "\n", " new File('social_net.txt').eachLine { \n", " (source, target) = it.split('\\t').collect(getOrCreate)\n", " source.addEdge('collabs', target)\n", " }\n", "\n", " g.tx().commit()\"\"\"\n", " gc = AsyncGremlinClient()\n", " t = gc.s(script,\n", " consumer=lambda x: print(\"Commited tx with response code: {}\".format(x.status_code)))\n", " t.execute()\n", " print(\"Loaded in {}\".format(datetime.now() - start))\n", "\n", "\n", "def count_nodes(gc):\n", " t = gc.s(\"g.V().count()\", collect=False, consumer=lambda x: print(x))\n", " t.execute()\n", "\n", "\n", "def count_edges(gc):\n", " t = gc.s(\"g.E().count()\", collect=False, consumer=lambda x: print(x))\n", " t.execute()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Commited tx with response code: 200\n" ] } ], "source": [ "build_schema()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Commited tx with response code: 200\n", "Loaded in 0:00:17.949163\n" ] } ], "source": [ "load_edges()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "gc = AsyncGremlinClient()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[16264]\n" ] } ], "source": [ "count_nodes(gc)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[47594]\n" ] } ], "source": [ "count_edges(gc)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2651.62404590785 edges per second\n" ] } ], "source": [ "print(\"{} edges per second\".format(47594 / 17.949))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }