{ "metadata": { "name": "", "signature": "sha256:a1c6b95edf0355b40675309517cc9a6c5072ac5593a81a1d21ad5679d35f7344" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "projx with Neo4j and the Flickr group membership graph." ] }, { "cell_type": "code", "collapsed": false, "input": [ "%load_ext cypher" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "import projx as px" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "heading", "level": 3, "metadata": {}, "source": [ "Load Flickr membership graph from edgelist to a Neo4j database." ] }, { "cell_type": "code", "collapsed": false, "input": [ "# Flicker group bipartite network from http://konect.uni-koblenz.de/networks/flickr-groupmemberships.\n", "# 395,979 users, 103,631 groups, 8,545,307 edges (memberships).\n", "flickergroup_etl = {\n", " \"extractor\": {\n", " \"edgelist\": {\n", " \"filename\": \"data/flickr-groupmemberships/out.flickr-groupmemberships\",\n", " \"delim\": \" \",\n", " \"pattern\": [\n", " {\"node\": {\"alias\": \"n\"}},\n", " {\"edge\": {}},\n", " {\"node\": {\"alias\": \"m\"}}\n", " ]\n", " }\n", " },\n", " \"transformers\": [\n", " {\n", " \"edge\": {\n", " \"pattern\": [\n", " {\"node\": {\"alias\": \"n\", \"label\": \"User\"}},\n", " {\"edge\": {\"label\": \"IN\"}},\n", " {\"node\": {\"alias\": \"m\", \"label\": \"Group\"}}\n", " ]\n", " }\n", " }\n", " ],\n", " \"loader\": {\n", " \"edgelist2neo4j\": {\n", " \"uri\": \"http://localhost:7474/db/data\",\n", " \"stmt_per_req\": 500,\n", " \"req_per_tx\": 25,\n", " \"indicies\": [\n", " {\"label\": \"User\", \"attr\": \"UniqueId\"},\n", " {\"label\": \"Group\", \"attr\": \"UniqueId\"}\n", " ]\n", " }\n", " }\n", "}" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "px.execute_etl(flickergroup_etl)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Statements per request: 500\n", "Requests per transactions: 25\n", "Created index: CREATE INDEX ON :User(UniqueId);" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Created index: CREATE INDEX ON :Group(UniqueId);" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Merged 1000000 edges in 0:12:19.132064" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Merged 2000000 edges in 0:23:51.167390" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Merged 3000000 edges in 0:34:52.905085" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Merged 4000000 edges in 0:45:31.490539" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Merged 5000000 edges in 0:56:09.081776" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Merged 6000000 edges in 1:06:26.836744" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Merged 7000000 edges in 1:17:13.032958" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Merged 8000000 edges in 1:27:56.693161" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Load complete: merged 8545000 edges in 1:33:23.530656" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "# This uses ipython-cypher by @versae.\n", "num_users = %cypher match (user:User) return count(user)\n", "num_groups = %cypher match (group:Group) return count(group)\n", "num_rels = %cypher match (user:User)-[rels:IN]->(group:Group) return count(rels)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "1 rows affected.\n", "1 rows affected." ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "1 rows affected." ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "print num_users, num_groups, num_rels" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "+-------------+\n", "| count(user) |\n", "+-------------+\n", "| 395979 |\n", "+-------------+ +--------------+\n", "| count(group) |\n", "+--------------+\n", "| 103631 |\n", "+--------------+ +-------------+\n", "| count(rels) |\n", "+-------------+\n", "| 8545307 |\n", "+-------------+\n" ] } ], "prompt_number": 7 } ], "metadata": {} } ] }