{
 "metadata": {
  "name": "",
  "signature": "sha256:bf5b2b2ae092f5b0d42f78e4d007f9759fc94adec84efc16660a7ad5bd91baab"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "heading",
     "level": 1,
     "metadata": {},
     "source": [
      "Graph Analysis - II"
     ]
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Imports"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np\n",
      "import pandas as pd\n",
      "import matplotlib.pyplot as plt\n",
      "import seaborn as sns\n",
      "from sklearn.preprocessing import MinMaxScaler\n",
      "import networkx as nx\n",
      "%matplotlib inline"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "heading",
     "level": 2,
     "metadata": {},
     "source": [
      "Centrality measures for the nodes "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Gk=nx.karate_club_graph()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "https://networkx.github.io/documentation/latest/reference/algorithms.centrality.html\n",
      "    \n",
      "https://networkx.github.io/documentation/latest/reference/generated/networkx.algorithms.link_analysis.pagerank_alg.pagerank.html"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "We start by computing the different centrality measures for our graph."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "degree_c = nx.degree_centrality(Gk)\n",
      "pagerank_c = nx.pagerank(Gk)\n",
      "eigenvector_c = nx.eigenvector_centrality(Gk)\n",
      "betweenness_c = nx.betweenness_centrality(Gk)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "n = len(Gk.nodes())\n",
      "deg = np.zeros(n)\n",
      "pr = np.zeros(n)\n",
      "eig = np.zeros(n)\n",
      "bw = np.zeros(n)\n",
      "i=0\n",
      "for node in Gk:\n",
      "    deg[i] = degree_c[node]\n",
      "    pr[i] = pagerank_c[node]\n",
      "    eig[i] = eigenvector_c[node]\n",
      "    bw[i] = betweenness_c[node]\n",
      "    i+=1\n",
      "    \n",
      "measures = pd.DataFrame()\n",
      "measures['nodes'] = Gk.nodes()\n",
      "measures.set_index(['nodes'], inplace=True)\n",
      "measures['eigenvector_c'] = pd.DataFrame.from_dict(eigenvector_c, orient='index')\n",
      "measures['pagerank_c'] = pd.DataFrame.from_dict(pagerank_c, orient='index')\n",
      "measures['degree_c'] = pd.DataFrame.from_dict(degree_c, orient='index')\n",
      "measures['betweenness_c'] = pd.DataFrame.from_dict(betweenness_c, orient='index')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "We can plot the correlation of the different centralities. Notice the strong positive correlation between the degree centrality and the pagerank centrality."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sns.corrplot(measures)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "We can also do a scatterplot for all the different pairs of centralities measures and try to see if there are any strong trends."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "with sns.axes_style('white'):\n",
      "    sns.pairplot(measures)    "
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "plt.scatter(deg,pr)\n",
      "plt.show()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "plt.scatter(deg,bw)\n",
      "plt.show()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "When plotting the graph, we can choose to represent the centrality of each node as its size. "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# plotting the graph \n",
      "scaler = MinMaxScaler((50,800))\n",
      "eig_scaled = scaler.fit_transform(eig)\n",
      "node_size = eig_scaled\n",
      "nx.draw(Gk, node_size=node_size, node_color='#6699cc')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Let's see how the above apply to directed graphs."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "G = nx.read_gml('celegansneural.gml')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print len(G.nodes()), len(G.edges())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "print nx.is_strongly_connected(G)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "If the graph is **not** strongly connected, we can keep its largest strongly connected component."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "scc = nx.strongly_connected_component_subgraphs(G)\n",
      "sizemax = 0\n",
      "Gmax = G\n",
      "for g in scc:\n",
      "    if len(g.nodes())>sizemax:\n",
      "        Gmax = g\n",
      "        sizemax = len(Gmax.nodes())\n",
      "print len(Gmax.nodes())"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "Gmax = nx.DiGraph(Gmax)\n",
      "degree_c = nx.degree_centrality(Gmax)\n",
      "pagerank_c = nx.pagerank(Gmax)\n",
      "eigenvector_c = nx.eigenvector_centrality(Gmax)\n",
      "betweenness_c = nx.betweenness_centrality(Gmax)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "n = len(Gmax.nodes())\n",
      "deg = np.zeros(n)\n",
      "pr = np.zeros(n)\n",
      "eig = np.zeros(n)\n",
      "bw = np.zeros(n)\n",
      "i=0\n",
      "for node in Gmax:\n",
      "    deg[i] = degree_c[node]\n",
      "    pr[i] = pagerank_c[node]\n",
      "    eig[i] = eigenvector_c[node]\n",
      "    bw[i] = betweenness_c[node]\n",
      "    i+=1\n",
      "measures = pd.DataFrame()\n",
      "measures['nodes'] = Gmax.nodes()\n",
      "measures.set_index(['nodes'], inplace=True)\n",
      "measures['eigenvector_c'] = pd.DataFrame.from_dict(eigenvector_c, orient='index')\n",
      "measures['pagerank_c'] = pd.DataFrame.from_dict(pagerank_c, orient='index')\n",
      "measures['degree_c'] = pd.DataFrame.from_dict(degree_c, orient='index')\n",
      "measures['betweenness_c'] = pd.DataFrame.from_dict(betweenness_c, orient='index')"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sns.corrplot(measures)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "with sns.axes_style('white'):\n",
      "    sns.pairplot(measures)    "
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# Code for setting the style of the notebook\n",
      "from IPython.core.display import HTML\n",
      "def css_styling():\n",
      "    styles = open(\"../theme/custom.css\", \"r\").read()\n",
      "    return HTML(styles)\n",
      "css_styling()"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "html": [
        "<link href='http://fonts.googleapis.com/css?family=EB+Garamond' rel='stylesheet' type='text/css'>\n",
        "<link href='http://fonts.googleapis.com/css?family=Alegreya+Sans:100,300,400,500,700,800,900,100italic,300italic,400italic,500italic,700italic,800italic,900italic' rel='stylesheet' type='text/css'>\n",
        "<link href='http://fonts.googleapis.com/css?family=Source+Code+Pro:300,400' rel='stylesheet' type='text/css'>\n",
        "<style>\n",
        "    @font-face {\n",
        "        font-family: \"Computer Modern\";\n",
        "        src: url('http://mirrors.ctan.org/fonts/cm-unicode/fonts/otf/cmunss.otf');\n",
        "    }\n",
        "    .code_cell {\n",
        "        width: 105ex !important ;\n",
        "        margin-bottom: 15px !important;\n",
        "    }\n",
        "    div.cell {\n",
        "        margin-left: auto;\n",
        "        margin-right: auto;\n",
        "        width: 70%;\n",
        "    }    \n",
        "    div.cell.selected {\n",
        "        border: thin rgba(171, 171, 171, 0.5) dashed;\n",
        "    }\n",
        "    h1 {\n",
        "        font-family: 'Alegreya Sans', sans-serif;\n",
        "    }\n",
        "    h2 {\n",
        "        font-family: 'EB Garamond', serif;\n",
        "    }\n",
        "    h3 {\n",
        "        font-family: 'EB Garamond', serif;\n",
        "        margin-top:12px;\n",
        "        margin-bottom: 3px;\n",
        "    }\n",
        "    h4 {\n",
        "        font-family: 'EB Garamond', serif;\n",
        "    }\n",
        "    h5 {\n",
        "        font-family: 'Alegreya Sans', sans-serif;\n",
        "    }\n",
        "    div.text_cell_render {\n",
        "        font-family: 'EB Garamond',Computer Modern, \"Helvetica Neue\", Arial, Helvetica, Geneva, sans-serif;\n",
        "        line-height: 145%;\n",
        "        font-size: 140%;\n",
        "    }\n",
        "    div.input_area {\n",
        "        border-color: rgba(0,0,0,0.10) !important;\n",
        "        background: #fafafa;\n",
        "    }\n",
        "    .CodeMirror {\n",
        "            font-family: \"Source Code Pro\";\n",
        "            font-size: 90%;\n",
        "    }\n",
        "    .prompt {\n",
        "        display: None;\n",
        "    }\n",
        "    .output {\n",
        "        padding-left: 50px;\n",
        "        padding-top: 5px;\n",
        "    }\n",
        "    .output_wrapper {\n",
        "        padding-left: 5px;\n",
        "        padding-top: inherit;\n",
        "    }\n",
        "    div.output_scroll {\n",
        "        width: inherit;\n",
        "    }\n",
        "    .inner_cell {\n",
        "        padding-left: 5px;\n",
        "    }\n",
        "    .text_cell_render h1 {\n",
        "        font-weight: 200;\n",
        "        font-size: 50pt;\n",
        "        line-height: 100%;\n",
        "        color:#CD2305;\n",
        "        margin-bottom: 0.5em;\n",
        "        margin-top: 0.5em;\n",
        "        display: block;\n",
        "    }\n",
        "    .text_cell_render h5 {\n",
        "        font-weight: 300;\n",
        "        font-size: 16pt;\n",
        "        color: #CD2305;\n",
        "        font-style: italic;\n",
        "        margin-bottom: .5em;\n",
        "        margin-top: 0.5em;\n",
        "        display: block;\n",
        "    }\n",
        "    .warning {\n",
        "        color: rgb( 240, 20, 20 )\n",
        "        }  \n",
        "</style>\n",
        "<script>\n",
        "    MathJax.Hub.Config({\n",
        "                        TeX: {\n",
        "                           extensions: [\"AMSmath.js\"]\n",
        "                           },\n",
        "                tex2jax: {\n",
        "                    inlineMath: [ ['$','$'], [\"\\\\(\",\"\\\\)\"] ],\n",
        "                    displayMath: [ ['$$','$$'], [\"\\\\[\",\"\\\\]\"] ]\n",
        "                },\n",
        "                displayAlign: 'center', // Change this to 'center' to center equations.\n",
        "                \"HTML-CSS\": {\n",
        "                    styles: {'.MathJax_Display': {\"margin\": 4}}\n",
        "                }\n",
        "        });\n",
        "</script>"
       ],
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 20,
       "text": [
        "<IPython.core.display.HTML at 0x2aefd208>"
       ]
      }
     ],
     "prompt_number": 20
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}