{
"metadata": {
"name": "",
"signature": "sha256:bf5b2b2ae092f5b0d42f78e4d007f9759fc94adec84efc16660a7ad5bd91baab"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "heading",
"level": 1,
"metadata": {},
"source": [
"Graph Analysis - II"
]
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Imports"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.preprocessing import MinMaxScaler\n",
"import networkx as nx\n",
"%matplotlib inline"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "heading",
"level": 2,
"metadata": {},
"source": [
"Centrality measures for the nodes "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Gk=nx.karate_club_graph()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://networkx.github.io/documentation/latest/reference/algorithms.centrality.html\n",
" \n",
"https://networkx.github.io/documentation/latest/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We start by computing the different centrality measures for our graph."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"degree_c = nx.degree_centrality(Gk)\n",
"pagerank_c = nx.pagerank(Gk)\n",
"eigenvector_c = nx.eigenvector_centrality(Gk)\n",
"betweenness_c = nx.betweenness_centrality(Gk)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"n = len(Gk.nodes())\n",
"deg = np.zeros(n)\n",
"pr = np.zeros(n)\n",
"eig = np.zeros(n)\n",
"bw = np.zeros(n)\n",
"i=0\n",
"for node in Gk:\n",
" deg[i] = degree_c[node]\n",
" pr[i] = pagerank_c[node]\n",
" eig[i] = eigenvector_c[node]\n",
" bw[i] = betweenness_c[node]\n",
" i+=1\n",
" \n",
"measures = pd.DataFrame()\n",
"measures['nodes'] = Gk.nodes()\n",
"measures.set_index(['nodes'], inplace=True)\n",
"measures['eigenvector_c'] = pd.DataFrame.from_dict(eigenvector_c, orient='index')\n",
"measures['pagerank_c'] = pd.DataFrame.from_dict(pagerank_c, orient='index')\n",
"measures['degree_c'] = pd.DataFrame.from_dict(degree_c, orient='index')\n",
"measures['betweenness_c'] = pd.DataFrame.from_dict(betweenness_c, orient='index')"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can plot the correlation of the different centralities. Notice the strong positive correlation between the degree centrality and the pagerank centrality."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sns.corrplot(measures)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also do a scatterplot for all the different pairs of centralities measures and try to see if there are any strong trends."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with sns.axes_style('white'):\n",
" sns.pairplot(measures) "
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"plt.scatter(deg,pr)\n",
"plt.show()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"plt.scatter(deg,bw)\n",
"plt.show()"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"When plotting the graph, we can choose to represent the centrality of each node as its size. "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# plotting the graph \n",
"scaler = MinMaxScaler((50,800))\n",
"eig_scaled = scaler.fit_transform(eig)\n",
"node_size = eig_scaled\n",
"nx.draw(Gk, node_size=node_size, node_color='#6699cc')"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's see how the above apply to directed graphs."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"G = nx.read_gml('celegansneural.gml')"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print len(G.nodes()), len(G.edges())"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"print nx.is_strongly_connected(G)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If the graph is **not** strongly connected, we can keep its largest strongly connected component."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"scc = nx.strongly_connected_component_subgraphs(G)\n",
"sizemax = 0\n",
"Gmax = G\n",
"for g in scc:\n",
" if len(g.nodes())>sizemax:\n",
" Gmax = g\n",
" sizemax = len(Gmax.nodes())\n",
"print len(Gmax.nodes())"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"Gmax = nx.DiGraph(Gmax)\n",
"degree_c = nx.degree_centrality(Gmax)\n",
"pagerank_c = nx.pagerank(Gmax)\n",
"eigenvector_c = nx.eigenvector_centrality(Gmax)\n",
"betweenness_c = nx.betweenness_centrality(Gmax)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"n = len(Gmax.nodes())\n",
"deg = np.zeros(n)\n",
"pr = np.zeros(n)\n",
"eig = np.zeros(n)\n",
"bw = np.zeros(n)\n",
"i=0\n",
"for node in Gmax:\n",
" deg[i] = degree_c[node]\n",
" pr[i] = pagerank_c[node]\n",
" eig[i] = eigenvector_c[node]\n",
" bw[i] = betweenness_c[node]\n",
" i+=1\n",
"measures = pd.DataFrame()\n",
"measures['nodes'] = Gmax.nodes()\n",
"measures.set_index(['nodes'], inplace=True)\n",
"measures['eigenvector_c'] = pd.DataFrame.from_dict(eigenvector_c, orient='index')\n",
"measures['pagerank_c'] = pd.DataFrame.from_dict(pagerank_c, orient='index')\n",
"measures['degree_c'] = pd.DataFrame.from_dict(degree_c, orient='index')\n",
"measures['betweenness_c'] = pd.DataFrame.from_dict(betweenness_c, orient='index')"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"sns.corrplot(measures)"
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"with sns.axes_style('white'):\n",
" sns.pairplot(measures) "
],
"language": "python",
"metadata": {},
"outputs": []
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"# Code for setting the style of the notebook\n",
"from IPython.core.display import HTML\n",
"def css_styling():\n",
" styles = open(\"../theme/custom.css\", \"r\").read()\n",
" return HTML(styles)\n",
"css_styling()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"\n",
"\n",
"\n",
""
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 20,
"text": [
""
]
}
],
"prompt_number": 20
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}