{ "metadata": { "name": "", "signature": "sha256:bf5b2b2ae092f5b0d42f78e4d007f9759fc94adec84efc16660a7ad5bd91baab" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 1, "metadata": {}, "source": [ "Graph Analysis - II" ] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Imports" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from sklearn.preprocessing import MinMaxScaler\n", "import networkx as nx\n", "%matplotlib inline" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Centrality measures for the nodes " ] }, { "cell_type": "code", "collapsed": false, "input": [ "Gk=nx.karate_club_graph()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "https://networkx.github.io/documentation/latest/reference/algorithms.centrality.html\n", " \n", "https://networkx.github.io/documentation/latest/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We start by computing the different centrality measures for our graph." ] }, { "cell_type": "code", "collapsed": false, "input": [ "degree_c = nx.degree_centrality(Gk)\n", "pagerank_c = nx.pagerank(Gk)\n", "eigenvector_c = nx.eigenvector_centrality(Gk)\n", "betweenness_c = nx.betweenness_centrality(Gk)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "n = len(Gk.nodes())\n", "deg = np.zeros(n)\n", "pr = np.zeros(n)\n", "eig = np.zeros(n)\n", "bw = np.zeros(n)\n", "i=0\n", "for node in Gk:\n", " deg[i] = degree_c[node]\n", " pr[i] = pagerank_c[node]\n", " eig[i] = eigenvector_c[node]\n", " bw[i] = betweenness_c[node]\n", " i+=1\n", " \n", "measures = pd.DataFrame()\n", "measures['nodes'] = Gk.nodes()\n", "measures.set_index(['nodes'], inplace=True)\n", "measures['eigenvector_c'] = pd.DataFrame.from_dict(eigenvector_c, orient='index')\n", "measures['pagerank_c'] = pd.DataFrame.from_dict(pagerank_c, orient='index')\n", "measures['degree_c'] = pd.DataFrame.from_dict(degree_c, orient='index')\n", "measures['betweenness_c'] = pd.DataFrame.from_dict(betweenness_c, orient='index')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can plot the correlation of the different centralities. Notice the strong positive correlation between the degree centrality and the pagerank centrality." ] }, { "cell_type": "code", "collapsed": false, "input": [ "sns.corrplot(measures)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can also do a scatterplot for all the different pairs of centralities measures and try to see if there are any strong trends." ] }, { "cell_type": "code", "collapsed": false, "input": [ "with sns.axes_style('white'):\n", " sns.pairplot(measures) " ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "plt.scatter(deg,pr)\n", "plt.show()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "plt.scatter(deg,bw)\n", "plt.show()" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "When plotting the graph, we can choose to represent the centrality of each node as its size. " ] }, { "cell_type": "code", "collapsed": false, "input": [ "# plotting the graph \n", "scaler = MinMaxScaler((50,800))\n", "eig_scaled = scaler.fit_transform(eig)\n", "node_size = eig_scaled\n", "nx.draw(Gk, node_size=node_size, node_color='#6699cc')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's see how the above apply to directed graphs." ] }, { "cell_type": "code", "collapsed": false, "input": [ "G = nx.read_gml('celegansneural.gml')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print len(G.nodes()), len(G.edges())" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "print nx.is_strongly_connected(G)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If the graph is **not** strongly connected, we can keep its largest strongly connected component." ] }, { "cell_type": "code", "collapsed": false, "input": [ "scc = nx.strongly_connected_component_subgraphs(G)\n", "sizemax = 0\n", "Gmax = G\n", "for g in scc:\n", " if len(g.nodes())>sizemax:\n", " Gmax = g\n", " sizemax = len(Gmax.nodes())\n", "print len(Gmax.nodes())" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "Gmax = nx.DiGraph(Gmax)\n", "degree_c = nx.degree_centrality(Gmax)\n", "pagerank_c = nx.pagerank(Gmax)\n", "eigenvector_c = nx.eigenvector_centrality(Gmax)\n", "betweenness_c = nx.betweenness_centrality(Gmax)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "n = len(Gmax.nodes())\n", "deg = np.zeros(n)\n", "pr = np.zeros(n)\n", "eig = np.zeros(n)\n", "bw = np.zeros(n)\n", "i=0\n", "for node in Gmax:\n", " deg[i] = degree_c[node]\n", " pr[i] = pagerank_c[node]\n", " eig[i] = eigenvector_c[node]\n", " bw[i] = betweenness_c[node]\n", " i+=1\n", "measures = pd.DataFrame()\n", "measures['nodes'] = Gmax.nodes()\n", "measures.set_index(['nodes'], inplace=True)\n", "measures['eigenvector_c'] = pd.DataFrame.from_dict(eigenvector_c, orient='index')\n", "measures['pagerank_c'] = pd.DataFrame.from_dict(pagerank_c, orient='index')\n", "measures['degree_c'] = pd.DataFrame.from_dict(degree_c, orient='index')\n", "measures['betweenness_c'] = pd.DataFrame.from_dict(betweenness_c, orient='index')" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "sns.corrplot(measures)" ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "with sns.axes_style('white'):\n", " sns.pairplot(measures) " ], "language": "python", "metadata": {}, "outputs": [] }, { "cell_type": "code", "collapsed": false, "input": [ "# Code for setting the style of the notebook\n", "from IPython.core.display import HTML\n", "def css_styling():\n", " styles = open(\"../theme/custom.css\", \"r\").read()\n", " return HTML(styles)\n", "css_styling()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "\n", "\n", "\n", "\n", "" ], "metadata": {}, "output_type": "pyout", "prompt_number": 20, "text": [ "" ] } ], "prompt_number": 20 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }