{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9130726e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# On Colab, you can uncomment the line below to install the required packages.\n",
    "# !pip install networkx pandas numpy matplotlib seaborn scipy\n",
    "\n",
    "# Clone the repo (to get access to the module)\n",
    "#!git clone https://github.com/jgarciab/NetworkScience.git\n",
    "#import sys\n",
    "#sys.path.append('/content/NetworkScience/Practicals/day1/')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb8701ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "# on your computer\n",
    "path_data = \"../../Data/\"\n",
    "\n",
    "# on colab uncomment the line below\n",
    "# path_data = \"/content/NetworkScience/Data/\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0e5089da",
   "metadata": {},
   "source": [
    "# Matrix Multiplication and Centrality in Networks\n",
    "\n",
    "Welcome to the afternoon practical! In this session, you'll learn how to represent networks as matrices, perform matrix operations, and compute node centrality measures.\n",
    "\n",
    "**Learning objectives:**\n",
    "- Understand different ways to represent a network (adjacency matrix, edgelist, etc.)\n",
    "- Practice converting between representations\n",
    "- Use matrix multiplication to explore paths and clustering\n",
    "- Calculate and interpret centrality measures\n",
    "\n",
    "Each section contains explanations, code, and exercises. Read the explanations and comments before running the code."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ddeeaed1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import network analysis libraries\n",
    "import networkx as nx  # For network analysis\n",
    "\n",
    "# Import data processing libraries\n",
    "import pandas as pd    # For data manipulation\n",
    "import numpy as np     # For numerical operations\n",
    "\n",
    "# Import utility functions from local file\n",
    "import sys\n",
    "sys.path.append(\"./\")\n",
    "from common_functions import *  # Custom helper functions\n",
    "\n",
    "# Import visualization libraries\n",
    "import pylab as plt\n",
    "import matplotlib as mpl\n",
    "import seaborn as sns\n",
    "\n",
    "# Set custom visualization parameters for better aesthetics\n",
    "custom_params = {\n",
    "    'axes.spines.right': False, 'axes.spines.top': False, 'axes.spines.left': False, 'axes.spines.bottom': False,\n",
    "    'lines.linewidth': 2, 'grid.color': 'lightgray', 'legend.frameon': False,\n",
    "    'xtick.labelcolor': '#484848', 'ytick.labelcolor': '#484848',\n",
    "    'xtick.color': '#484848', 'ytick.color': '#484848',\n",
    "    'text.color': '#484848', 'axes.labelcolor': '#484848',\n",
    "    'axes.titlecolor':'#484848', 'figure.figsize': [5,3],\n",
    "    'axes.titlelocation':'left',\n",
    "    'xaxis.labellocation':'left',\n",
    "    'yaxis.labellocation':'bottom'\n",
    "}\n",
    "# Define a color palette for plots\n",
    "palette = ['#3d348b','#e6af2e','#191716','#e0e2db']  # Custom color palette\n",
    "sns.set_theme(context='paper', style='white', palette=palette, font_scale=1.3, color_codes=True, rc=custom_params)\n",
    "\n",
    "# Widen the notebook display for better plot visibility\n",
    "from IPython.display import display, HTML\n",
    "display(HTML(\"<style>.container { width:90% !important; }</style>\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5a40cd5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Helper Functions for Visualization and Conversion ---\n",
    "# These functions help plot networks, adjacency matrices, and convert between representations.\n",
    "\n",
    "def plot_network(G, a0 = None, values = None):\n",
    "    \"\"\"\n",
    "    Plots network with nice defaults.\n",
    "    Node color and size can be set by 'values' (e.g., centrality).\n",
    "    \"\"\"\n",
    "    if values is None:\n",
    "        values = nx.degree_centrality(G).values()\n",
    "    if a0 is None:\n",
    "        fig, a0 = plt.subplots()\n",
    "    norm = mpl.colors.Normalize(vmin=min(values), vmax=max(values), clip=True)\n",
    "    mapper = mpl.cm.ScalarMappable(norm=norm, cmap=mpl.cm.coolwarm)\n",
    "    mapper._A = []\n",
    "    cb = plt.colorbar(mapper, ax = a0, location=\"bottom\", shrink=0.8, pad = 0.02, label = \"Value\")\n",
    "    cb.outline.set_visible(False)\n",
    "\n",
    "    # Draw network layout\n",
    "    if nx.is_bipartite(G):\n",
    "        top = [_ for _ in G.nodes() if _[0] != \"S\"]\n",
    "        pos = nx.bipartite_layout(G, top)\n",
    "        node_color = [\"#e6af2e\" if node in top else \"#e0e2db\" for node in G]\n",
    "    else:\n",
    "        pos = nx.spring_layout(G, seed = 1)\n",
    "        node_color = [mapper.to_rgba(i) for i in values]\n",
    "\n",
    "    nx.draw(G, pos = pos, with_labels = True, node_size=500*np.array(list(values)), edge_color = \"darkgray\", \n",
    "           node_color = node_color, ax = a0)\n",
    "    \n",
    "\n",
    "def plot_network_adj(G, values = None):\n",
    "    \"\"\"\n",
    "    Plots the graph (with color/node size adjusted according to values) and the adjacency matrix.\n",
    "    Shows both the network and its matrix representation side by side.\n",
    "    \"\"\"\n",
    "    f, (a0, a1, a2) = plt.subplots(1, 3, gridspec_kw={'width_ratios': [1, 1, 0.5]}, figsize=(12,4))\n",
    "    \n",
    "    # Plot network\n",
    "    plot_network(G, a0, values = values)\n",
    "    \n",
    "    # Show adjacency table\n",
    "    df = nx.to_pandas_adjacency(G, nodelist=list(G.nodes()), dtype=int)\n",
    "    plot_table(a1, df.values, df.index, df.columns)\n",
    "\n",
    "    A = nx.to_scipy_sparse_array(G, nodelist=list(G.nodes()), weight=1)\n",
    "    N = len(G.nodes())\n",
    "    \n",
    "    # Plot table as a heatmap\n",
    "    a2.spy(A)\n",
    "    sns.despine(bottom=\"True\", left=True)\n",
    "    plt.grid(True)\n",
    "    a2.set_xticks(range(N), G.nodes(), rotation=90)\n",
    "    a2.set_yticks(range(N), G.nodes())\n",
    "    \n",
    "    plt.tight_layout()\n",
    "\n",
    "def plot_table(a1, cellText, rowLabels, colLabels):\n",
    "    \"\"\"\n",
    "    Plots a table in a figure. Highlights cells with edges.\n",
    "    \"\"\"\n",
    "    cellText = pd.DataFrame(cellText)\n",
    "    the_table = a1.table(cellText=cellText.values, rowLabels=rowLabels, colLabels=colLabels, loc='center', colLoc = \"left\", cellColours=(cellText>0).replace({False: \"white\", True:\"#e6af2e\"}).values)\n",
    "    a1.axis(False) \n",
    "    the_table.scale(0.8, 1.6)\n",
    "\n",
    "def adj_to_net(A, d_names = {0: \"Alice\", 1: \"Bob\", 2: \"John\", 3:\"Amy\", 4:\"Mike\", 5:\"Rose\"}):\n",
    "    \"\"\"\n",
    "    Create graph from adjacency matrix, rename nodes for clarity.\n",
    "    \"\"\"\n",
    "    G = nx.from_numpy_array(A, create_using=nx.DiGraph())\n",
    "    G = nx.relabel_nodes(G, d_names)\n",
    "    return G\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c9ebce92",
   "metadata": {},
   "source": [
    "# Let's start creating and visualizing an example network\n",
    "\n",
    "We'll create small example networks to see how matrix representations work.\n",
    "\n",
    "**Note:** The `create_using` parameter specifies if the network is directed or undirected."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7d193e37",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Create and Visualize Example Networks ---\n",
    "# We'll create small example networks to see how matrix representations work.\n",
    "#\n",
    "# Note: The 'create_using' parameter specifies if the network is directed or undirected.\n",
    "\n",
    "# Small directed network to understand matrix multiplication\n",
    "G_dir = nx.from_edgelist([\n",
    "    (\"Alice\", \"Bob\"),\n",
    "    (\"John\", \"Alice\"),\n",
    "    (\"Bob\", \"John\"),\n",
    "    (\"Amy\", \"John\"),\n",
    "    (\"Mike\", \"John\"),\n",
    "    (\"Rose\", \"Alice\"),\n",
    "    (\"Mike\", \"Rose\")\n",
    "], create_using=nx.DiGraph())\n",
    "\n",
    "# Visualize the directed network and its adjacency matrix\n",
    "plot_network_adj(G_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "54585c50",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Create and Visualize an Undirected Network ---\n",
    "# This network has the same edges as above, but is undirected.\n",
    "#\n",
    "G_undir = nx.from_edgelist([\n",
    "    (\"Alice\", \"Bob\"),\n",
    "    (\"John\", \"Alice\"),\n",
    "    (\"Bob\", \"John\"),\n",
    "    (\"Amy\", \"John\"),\n",
    "    (\"Mike\", \"John\"),\n",
    "    (\"Rose\", \"Alice\"),\n",
    "    (\"Mike\", \"Rose\")\n",
    "], create_using=nx.Graph())\n",
    "\n",
    "# Visualize the undirected network and its adjacency matrix\n",
    "plot_network_adj(G_undir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "51a04468",
   "metadata": {},
   "outputs": [],
   "source": [
    "# --- Add Node Metadata (Attributes) ---\n",
    "# Here we create a DataFrame with the number of children for each person.\n",
    "# This metadata will be used later for matrix operations.\n",
    "\n",
    "df = pd.DataFrame([[\"Alice\", 2],\n",
    "                  [\"Bob\",    0],\n",
    "                  [\"John\",   2],\n",
    "                  [\"Amy\",    0],\n",
    "                  [\"Mike\",   1],\n",
    "                  [\"Rose\",   5]], \n",
    "                  columns=[\"person\",\"children\"])\n",
    "\n",
    "df  # Display the metadata table"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c01b51a",
   "metadata": {},
   "source": [
    "# Exercise 1: Network representations\n",
    "\n",
    "**Goal:** Compare different ways to represent matrices and convert between them.\n",
    "\n",
    "We'll use the `G_dir` network for these tasks."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6f409eee",
   "metadata": {},
   "source": [
    "## 1.1 Convert the directed network to different formats\n",
    "- `numpy_array`: Fast, good for numerical operations.\n",
    "- `scipy_sparse_array`: Efficient for large, sparse networks.\n",
    "- `pandas_adjacency`: Easy to inspect as a table.\n",
    "- `pandas_edgelist`: Good for data manipulation and import/export.\n",
    "\n",
    "Use the `nx.to_...` functions. What are the advantages of each?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7cabece8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Example to convert the network to a sparse adjacency matrix\n",
    "A = nx.to_scipy_sparse_array(G_dir, nodelist=list(G_dir.nodes()), weight=1)\n",
    "A"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "362161a0",
   "metadata": {},
   "source": [
    "### 1.2 Visualize the adjacency matrix\n",
    "Use `plt.spy()` to see the structure of the adjacency matrix. Nonzero entries indicate edges."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a8f0b56d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Visualize the sparse adjacency matrix using a spy plot\n",
    "A = nx.to_scipy_sparse_array(G_dir, nodelist=list(G_dir.nodes()), weight=1)\n",
    "plt.spy(A)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "764792ab",
   "metadata": {},
   "source": [
    "## 1.3 Create a network from an edgelist\n",
    "\n",
    "Convert a pandas edgelist back to a graph. This is useful for importing/exporting network data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6ac3f23",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Starting from an edgelist \n",
    "df_edg = nx.to_pandas_edgelist(G_dir, nodelist=list(G_dir.nodes()))\n",
    "df_edg.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88866f25",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert to graph and plot\n",
    "G = nx.from_pandas_edgelist(df_edg, create_using=nx.DiGraph())\n",
    "\n",
    "plot_network_adj(G)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7da7ed82",
   "metadata": {},
   "source": [
    "## 1.4 Create a network from an adjacency matrix\n",
    "\n",
    "Convert a matrix back to a graph. Remember to relabel nodes if needed."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "233dd0ea",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert the following adjacency matrix\n",
    "A = np.array([[0, 1, 0, 0, 0, 0],\n",
    "       [0, 0, 1, 0, 0, 0],\n",
    "       [1, 0, 0, 0, 0, 0],\n",
    "       [0, 0, 1, 0, 0, 0],\n",
    "       [0, 0, 1, 0, 0, 1],\n",
    "       [1, 0, 0, 0, 0, 0]])\n",
    "\n",
    "# Translation between index and names\n",
    "d_names = {0: 'Alice', 1: 'Bob', 2: 'John', 3: 'Amy', 4: 'Mike', 5: 'Rose'}\n",
    "A"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5e2d781",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert to graph\n",
    "G = nx.from_numpy_array(A, create_using=nx.DiGraph())\n",
    "# Add labels\n",
    "G = nx.relabel_nodes(G, d_names) \n",
    "# Plot\n",
    "plot_network_adj(G)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e5f848f3",
   "metadata": {},
   "source": [
    "# Exercise 2: Matrix multiplication = combining the attributes of the neighbors\n",
    "\n",
    "**Question:** What is the average number of children of your friends?\n",
    "- Use matrix multiplication (`@`) to sum the children of each node's neighbors.\n",
    "- Divide by the number of neighbors to get the average.\n",
    "- Visualize the result on the network.\n",
    "\n",
    "In python\n",
    "* Matrix multiplication (dot-product) is done using `@` (e.g. A @ B)\n",
    "    * The number of columns of A needs to match the number of rows of B!\n",
    "* Element-wise multiplication is done using `*` (e.g. A*B multiplies the element i,j of A with the element i,j of B)\n",
    "    * If B is a matrix (==array), the dimensions need to match those of A\n",
    "    * If B is a vector, it multiplies it row-wise (the number of rows of A need to match the number of elements of B)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "04be8ab4",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Original network adding the children as a color/size\n",
    "plot_network_adj(G_undir, values = df[\"children\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a510bc2a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# adjacency array\n",
    "A = nx.to_scipy_sparse_array(G_undir)\n",
    "\n",
    "# Calculate sum of neighbors children\n",
    "sum_children = A @ df[\"children\"]\n",
    "\n",
    "# Divide by number of neighbors to get an average\n",
    "avg_children = sum_children / A.sum(axis=1)\n",
    "\n",
    "print(sum_children)\n",
    "plot_network_adj(G_undir, values=avg_children)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7782ec56",
   "metadata": {},
   "source": [
    "# Exercise 3: Matrix multiplication and paths\n",
    "\n",
    "## 3a. Number of paths\n",
    "- The adjacency matrix to the power of n (`A^n`) gives the number of paths of length n between nodes.\n",
    "- Use this to find nodes 2 or 3 steps away.\n",
    "\n",
    "## 3b. Triangles and clustering\n",
    "- The diagonal of `A^3` counts triangles.\n",
    "- For undirected networks, divide by 2; for directed, use as is.\n",
    "- Compare with NetworkX's built-in clustering."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ed2b490d",
   "metadata": {},
   "source": [
    "## Exercise 3a: Matrix multiplication and paths"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae712801",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Number of paths to go from node i to node j in x steps\n",
    "\n",
    "# Find who is one step away (just the normal adjacency matrix)\n",
    "print(\"One step away\")\n",
    "plot_network_adj(G_dir)\n",
    "A = nx.to_scipy_sparse_array(G_dir)\n",
    "\n",
    "# Find who is two steps away\n",
    "print(\"Two steps away\")\n",
    "G2 = adj_to_net(A @ A) #A**2\n",
    "plot_network_adj(G2)\n",
    "\n",
    "# Find who is three steps away\n",
    "print(\"Three steps away. Triangles!\")\n",
    "G3 = adj_to_net(A @ A @ A) #A**3\n",
    "plot_network_adj(G3)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2058e0be",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Number of nodes reached in 1, 2 or 3 steps\n",
    "M = (A + A@A + A@A@A) > 0 # all nodes should be counted only once (e.g. doesn't matter that you can reach Rose in two different ways)\n",
    "M.setdiag(0) # do not count yourself\n",
    "M.todense().sum(1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6bd6646e",
   "metadata": {},
   "source": [
    "## Exercise 3b: Number of triangles and clustering"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a426cf21",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Count number of triangles in the directed network\n",
    "plot_network_adj(G_dir)\n",
    "A = nx.to_scipy_sparse_array(G_dir)\n",
    "path_3 = A @ A @ A\n",
    "\n",
    "# For undirected newtorks there are two directions, for directed networks one\n",
    "display(list(zip(G_dir.nodes(), path_3.diagonal() )))\n",
    "\n",
    "# Number of triangles = trace / 3 (each  triangle count in 3 nodes)\n",
    "print(path_3.trace()/3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "250588dc",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Count number of triangles in the undirected network\n",
    "plot_network_adj(G_undir)\n",
    "A = nx.to_scipy_sparse_array(G_undir)\n",
    "path_3 = A @ A @ A\n",
    "\n",
    "\n",
    "# For undirected newtorks there are two directions\n",
    "display(list(zip(G_undir.nodes(), path_3.diagonal() / 2)))\n",
    "\n",
    "# Number of triangles = trace / 6 (each  triangle count in 3 nodes, each triangle counted in two directions)\n",
    "print(path_3.trace()/3/2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eaa7e22c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Local clustering = number of triangles / number of potential links\n",
    "print(nx.clustering(G_undir)) #standard nx function\n",
    "A = nx.to_scipy_sparse_array(G_undir)\n",
    "\n",
    "# Number of triangles\n",
    "path_3 = (A@A@A).diagonal()/2 # divided by two because there are two directions\n",
    "\n",
    "# Number of potential links between neighbors\n",
    "degree = A.sum(1)\n",
    "potential_links = (degree*(degree-1)/2)\n",
    "\n",
    "pd.DataFrame(zip(G_undir.nodes(), \n",
    "         path_3, \n",
    "         potential_links, \n",
    "         path_3/potential_links,\n",
    "         nx.clustering(G_undir).values()\n",
    "        ),\n",
    "        columns=[\"Node\", \"Triangles\", \"Potential links\", \"Clustering coefficient\", \"Clustering coefficient (nx)\"]\n",
    "    )"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "86fbb95c",
   "metadata": {},
   "source": [
    "---"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "75672afb",
   "metadata": {},
   "source": [
    "# Exercise 4: Node centrality\n",
    "\n",
    "Centrality measures help identify important nodes.\n",
    "- **Degree:** Number of connections\n",
    "- **Closeness:** Inverse of average distance to all others\n",
    "- **Betweenness:** Fraction of shortest paths passing through\n",
    "- **Eigenvector:** Influence based on neighbors' importance\n",
    "- **Pagerank:** Like eigenvector, but for directed networks\n",
    "- **Katz, HITS:** Other advanced measures\n",
    "\n",
    "Use `plot_network_distribution` to visualize centrality as node color."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2806068",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load example graph\n",
    "G = nx.florentine_families_graph()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "541bfaf6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Degree centrality\n",
    "cent = nx.degree_centrality(G) \n",
    "cent = [cent[node] for node in G.nodes()]\n",
    "\n",
    "plot_network_distribution(G, cent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a9c21674",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Betweeness centrality\n",
    "cent = nx.betweenness_centrality(G) \n",
    "cent = [cent[node] for node in G.nodes()]\n",
    "\n",
    "plot_network_distribution(G, cent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c3e3686",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Closeness centrality\n",
    "cent = nx.closeness_centrality(G) \n",
    "cent = [cent[node] for node in G.nodes()]\n",
    "\n",
    "plot_network_distribution(G, cent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44a5a75d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Harmonic centrality\n",
    "cent = nx.harmonic_centrality(G) \n",
    "cent = [cent[node]/15 for node in G.nodes()]\n",
    "\n",
    "plot_network_distribution(G, cent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7646a57",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Eigenvector centrality\n",
    "cent = nx.eigenvector_centrality(G) \n",
    "cent = [cent[node] for node in G.nodes()]\n",
    "\n",
    "plot_network_distribution(G, cent)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "601aff6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Pagerank centrality\n",
    "cent = nx.pagerank(G) \n",
    "cent = [cent[node] for node in G.nodes()]\n",
    "\n",
    "plot_network_distribution(G, cent)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "451bea61",
   "metadata": {},
   "source": [
    "# Exercise 5: Centrality in real networks\n",
    "\n",
    "Apply centrality measures to the Twitter and PPI networks.\n",
    "- Who would you choose to spread news quickly? (assuming news are always passed on by the recipients)\n",
    "- Who connects different communities?\n",
    "- Who would you ask for information about who is most influential?\n",
    "- Who would be best for spreading unreliable news? (assuming news are sometimes not passed on)\n",
    "\n",
    "Calculate the top 10 nodes for each measure."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8dc03e4-8aa1-4230-9b7b-89986a3f0416",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read edgelist\n",
    "df = pd.read_csv(f\"{path_data}/ic2s2_netsci_3.tsv\", sep=\"\\t\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dcbf609e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Convert to networkx\n",
    "Gt = nx.from_pandas_edgelist(df, create_using=nx.DiGraph())\n",
    "Gt.remove_edges_from(nx.selfloop_edges(Gt)) #remove self-edges"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8599c54",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Util function to extract top 10 nodes based on centrality measures\n",
    "def extract_top_10(d_centrality):\n",
    "    return pd.DataFrame.from_dict(d_centrality, orient=\"index\").sort_values(by=0, ascending=False).head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "52dda444-a6ac-4549-a105-8fc26f17d7c3",
   "metadata": {},
   "outputs": [],
   "source": [
    "cc = extract_top_10(nx.closeness_centrality(Gt)) #slow (all distances)\n",
    "bc = extract_top_10(nx.betweenness_centrality(Gt)) #slow (all shortest paths)\n",
    "ac = extract_top_10(nx.hits(Gt)[0]) #0=hubs, 1=authorities\n",
    "ec = extract_top_10(nx.pagerank(Gt))\n",
    "\n",
    "df = pd.concat([cc,bc,ac,ec], axis=1)\n",
    "df.columns = [\"closeness\",\"betweeness\",\"hubs\",\"pagerank\"]\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "09dd06dc-8cbf-47b9-b5c3-366021b1731f",
   "metadata": {},
   "source": [
    "### Doing the analysis on the PPI network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "888f8dee-e210-4e73-b958-c90fbbd8e1fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read PPI network\n",
    "path_network = f\"{path_data}/ppi_network_prediction.graphml\"\n",
    "G = nx.read_graphml(path_network, node_type=int)\n",
    "len(G.nodes()), len(G.edges())\n",
    "\n",
    "# Add labels\n",
    "mapping = {node: data['label'] for node, data in G.nodes(data=True) if 'label' in data}\n",
    "G = nx.relabel_nodes(G, mapping)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77daab8b-f54e-4537-9771-7b6360185945",
   "metadata": {},
   "outputs": [],
   "source": [
    "cc = extract_top_10(nx.closeness_centrality(G)) #slow (all distances)\n",
    "bc = extract_top_10(nx.betweenness_centrality(G)) #slow (all shortest paths)\n",
    "ec = extract_top_10(nx.eigenvector_centrality(G))\n",
    "\n",
    "df = pd.concat([cc,bc,ec], axis=1)\n",
    "df.columns = [\"closeness\",\"betweeness\",\"eigenvalue\"]\n",
    "df\n",
    "\n",
    "#YLR291C: Involved in regulation of translational initiation. \n",
    "#Essential gene; conditional mutants are defective in autophagy and in Golgi localization of reporter proteins"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ed90fe8b",
   "metadata": {},
   "source": [
    "# Exercise 6: Eigenvector and Pagerank (advanced)\n",
    "\n",
    "Learn how eigenvector and pagerank centrality can be computed using the power method.\n",
    "- **Eigenvector:** Influence spreads to neighbors, repeated until convergence.\n",
    "- **Pagerank:** Random walkers move through the network, with some probability of jumping anywhere.\n",
    "\n",
    "Compare your manual results to NetworkX's built-in functions."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d8bf7528",
   "metadata": {},
   "source": [
    "## 6.1 Eigenvector\n",
    "\n",
    "For eigenvector centrality, each node has an influence of 1, that it is distributed to the neighbors. This process is done many times, until it converges."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7e83dbb5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Eigenvector centrality, manually\n",
    "N = len(G_dir)\n",
    "A = nx.to_numpy_array(G_undir)\n",
    "\n",
    "# Start with everybody having 1 unit of influence\n",
    "weight = np.ones(N)\n",
    "for i in range(100):\n",
    "    # spread influence to neighbors \n",
    "    weight = weight @ A\n",
    "\n",
    "    # normalize uwing the geometrical mean\n",
    "    weight = weight / np.sqrt(np.sum(weight**2))\n",
    "    \n",
    "    if i < 5:\n",
    "        print(G_undir.nodes())\n",
    "        print(weight)\n",
    "\n",
    "        plot_network(G_undir, None, values = weight)\n",
    "        plt.show()\n",
    "\n",
    "# Compare with the results of networkx\n",
    "print(list(zip(G_undir.nodes(), weight)))\n",
    "nx.eigenvector_centrality(G_undir)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9849d6c9",
   "metadata": {},
   "source": [
    "## 6.2 Pagerank\n",
    "For pagerank centrality, we start with 1 random walker distributed over all nodes, from there, it gets distributed to the neighbors (1/k of the random walker go to each of the k neighbors). This process is done many times, until it converges."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74f5b2e5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# PAgerank centrality, manually\n",
    "N = len(G_dir)\n",
    "A = nx.to_numpy_array(G_dir)\n",
    "\n",
    "## Calculate transition matrix (Row normalize matrix)\n",
    "# construct diagonal inverse degree matrix\n",
    "degree = A.sum(1)\n",
    "D = np.diag(1./degree, 0)\n",
    "A_hat = (D @ A)\n",
    "\n",
    "# random walkers are spread out evenly\n",
    "weight = np.ones(N)/N\n",
    "\n",
    "for i in range(1000):\n",
    "    # calculate where random walkers go next\n",
    "    weight = weight @ (0.85*A_hat + 0.15/N)\n",
    "\n",
    "# Compare results with networkx\n",
    "print(list(zip(G_dir.nodes(), weight)))\n",
    "nx.pagerank(G_dir)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0dbfd98f",
   "metadata": {},
   "source": [
    "## 6.3 Infinite random walker\n",
    "\n",
    "A random walker ends up a fraction of time in each node proportional to the degree of the node"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "55acfe29",
   "metadata": {},
   "outputs": [],
   "source": [
    "N = len(G_undir)\n",
    "A = nx.to_numpy_array(G_undir)\n",
    "\n",
    "## construct transition matrix (row normalised adjacency matrix)\n",
    "# construct diagonal inverse degree matrix\n",
    "degree = A.sum(1)\n",
    "D = np.diag(1./degree, 0)\n",
    "A_hat = (D @ A)\n",
    "\n",
    "# it does not matter where the walker starts\n",
    "weight = np.ones(N)/N\n",
    "\n",
    "for i in range(1000):\n",
    "    # calculate power\n",
    "    weight = weight @ (A_hat)\n",
    "\n",
    "# Normalize to match the scale of degree centrality \n",
    "weight = weight / np.sqrt(np.sum(weight**2))\n",
    "\n",
    "# Compare to networkx\n",
    "print(list(zip(G_undir.nodes(), weight)))\n",
    "nx.degree_centrality(G_undir)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "networks",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}