{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Comparison of different sampling strategies for GDS" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "application/javascript": [ "IPython.OutputArea.auto_scroll_threshold = 9999;\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%%javascript\n", "IPython.OutputArea.auto_scroll_threshold = 9999;" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "# Importing needed libraries \n", "import matplotlib\n", "import networkx as nx\n", "import random\n", "import numpy as np\n", "import copy\n", "import matplotlib.pyplot as plt\n", "from sklearn.cluster import KMeans\n", "\n", "from scipy.sparse import csgraph\n", "from scipy.sparse.linalg import eigsh, svds\n", "from scipy.linalg import eigh, orth, null_space\n", "\n", "from IPython.display import display, clear_output\n", "import ipywidgets as widgets\n", "from ipywidgets import Button, Layout, GridspecLayout" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [], "source": [ "# nbi:hide_in\n", "\n", "# Creating the graph class\n", "class Graph(object):\n", " r\"\"\"\n", " Args:\n", " edges ([num_edges, 3] array): Graph connectivity in COO format \n", " (instead of saving the adjacency matrix coo format saves only the node \n", " values so the weights need to be given separetely). Third argument is \n", " the weight. \n", " \"\"\"\n", " def __init__(self, numNodes=1, edges=[], samples=[], **kwargs):\n", " self.edges = edges\n", " self.numNodes = numNodes\n", " self.nodes = [i for i in range(numNodes)]\n", " self.samples = samples\n", " self.pos = None\n", " \n", " def adj(self):\n", " adjacency_matr = np.zeros([self.numNodes, self.numNodes])\n", " for idx, row in enumerate(self.edges):\n", " ind1 = self.nodes.index(row[0])\n", " ind2 = self.nodes.index(row[1])\n", " adjacency_matr[ind1, ind2] = row[2]\n", " adjacency_matr[ind2, ind1] = adjacency_matr[ind1, ind2]\n", " return adjacency_matr\n", " \n", " def degrees(self):\n", " adj = self.adj()\n", " degrees = np.sum(adj, axis=0)\n", " return degrees\n", " \n", " def laplacian(self):\n", " Adj = self.adj()\n", " D = np.diag(self.degrees())\n", " Lap = D - Adj\n", " return Lap\n", " \n", " def add_node(self):\n", " self.numNodes += 1\n", " self.nodes.append(max(self.nodes)+1)\n", " \n", " def add_edge(self, edge):\n", " if edge!=None:\n", " self.edges.append(edge)\n", " \n", " def add_sample(self, node):\n", " if node not in self.samples:\n", " self.samples.append(node)\n", " \n", " def del_sample(self, node):\n", " if node in self.samples:\n", " self.samples.remove(node)\n", " \n", " def del_node(self, node):\n", " if node in self.nodes:\n", " self.numNodes-=1\n", " self.edges = [item for item in self.edges if item[0]!=node and item[1]!=node]\n", " self.nodes.remove(node)\n", " self.del_sample(node)\n", " \n", " def del_edge(self, pair):\n", " self.edges[:] = [item for item in self.edges if item[:2]!=pair and item[:2]!=(pair[1], pair[0])]\n", " \n", " def change_edge(self, newedge):\n", " for edge in self.edges:\n", " if (edge[0], edge[1])==(newedge[0], newedge[1]) or (edge[1], edge[0])==(newedge[0], newedge[1]):\n", " self.del_edge((newedge[0], newedge[1]))\n", " self.add_edge(newedge)\n", " \n", " #reset graph\n", " def reset(self):\n", " self.numNodes = 1\n", " self.nodes = [i for i in range(self.numNodes)]\n", " self.edges = []\n", " self.pos=None\n", " \n", " def lapl_eigen(self, dim=None):\n", " Lap=self.laplacian()\n", " if dim==None:\n", " dim=G.numNodes\n", " vals, U = eigh(Lap, subset_by_index=[0,dim-1])\n", " return vals, U\n", " \n", " def adjacent2(self):\n", " \"\"\"Return the adjoint nodes for given node\"\"\"\n", " adjacency = {node:[] for node in self.nodes}\n", " for edge in self.edges:\n", " adjacency[edge[0]].append(edge[1])\n", " adjacency[edge[1]].append(edge[0])\n", " return adjacency\n", " \n", " def is_connected(self):\n", " \"\"\"Check if the graph is connected using width-first search\"\"\"\n", " adjacency = self.adjacent2()\n", " count=0\n", " found = {i:False for i in self.nodes}\n", " Q = []\n", " Q.append(0)\n", " while Q: # checks if Q is empty\n", " nhbs = adjacency[Q[0]]\n", " for node in nhbs:\n", " if found[node]==False:\n", " count+=1\n", " found[node]=True\n", " Q.append(node)\n", " Q.pop(0)\n", " if count==self.numNodes:\n", " return True\n", " else:\n", " return False\n", " \n", " def draw(self, ax, output=None, update=False, title=None, show_eigen=False, k=None, labels=None):\n", " #create the networkx graph\n", " Gnx = nx.Graph()\n", " Gnx.add_nodes_from(self.nodes)\n", " Gnx.add_weighted_edges_from(self.edges)\n", " if self.pos==None or update==True:\n", " self.pos = nx.spring_layout(Gnx)\n", " \n", " # colors\n", " if labels!=None:\n", " if k==None:\n", " k=len(set(labels))\n", " colors = plt.cm.get_cmap('tab20', k)\n", " color_label = colors(labels)\n", " node_colors = [(1.0, 1.0, 0.7, 1.0) if node in G.samples else color_label[node] for node in G.nodes]\n", " node_edges = [color_label[node] for node in G.nodes]\n", " else:\n", " node_colors = [(1.0, 1.0, 0.7, 1.0) if node in G.samples else (0.15, 0.5, 0.7, 1.) for node in G.nodes]\n", " node_edges = (0.15, 0.5, 0.7, 1.)\n", " \n", " #plot \n", " if output==None:\n", " ax.cla()\n", " nx.draw_networkx(Gnx, ax=ax, node_color=node_colors, edgecolors=node_edges, node_size=400, \n", " pos=self.pos)\n", " ax.set_title(title)\n", " display(fig); \n", " else:\n", " with output: \n", " ax.cla()\n", " nx.draw_networkx(Gnx, ax=ax, node_color=node_colors, edgecolors=node_edges, node_size=400, \n", " pos=self.pos)\n", " ax.set_title(title)\n", " display(fig); \n", " \n", " if show_eigen==True:\n", " eig, U = self.lapl_eigen()\n", " display(\"Laplacian eigenvalues are\")\n", " display(eig)\n", " display(\"Laplacian eigenvectors are\")\n", " display(U)\n" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "def dynamic(A, numIter, V):\n", " Mat = np.eye(A.shape[0])\n", " for i in range(numIter-1):\n", " Mat = np.concatenate([np.eye(A.shape[0]), Mat @ A])\n", " F = Mat @ V\n", " return F.reshape(A.shape[0], numIter*V.shape[1], order=\"F\")\n", " \n", "def gds(G, pw_dim, numIter, output, options=0):\n", " # sampling matrix\n", " S = np.zeros([G.numNodes, len(G.samples)])\n", " for j, node in enumerate(G.samples):\n", " i = G.nodes.index(node)\n", " S[i, j]=1\n", " \n", " # Compute PW eigenvectors\n", " vals, U = G.lapl_eigen(pw_dim)\n", " \n", " # Compute the dynamical sampling vectors\n", " if options==0:\n", " Lap=G.laplacian()\n", " B = dynamic(Lap, numIter, S)\n", " if options==1:\n", " Adj=G.adj()\n", " B = dynamic(Adj, numIter, S)\n", " \n", " # Project onto PW space\n", " PF = U.transpose() @ B\n", " \n", " # Compute frame bounds\n", " Frame_op = PF @ PF.transpose()\n", " low = svds(Frame_op, k=1, which='SM', return_singular_vectors=False)[0]\n", " up = svds(Frame_op, k=1, which='LM', return_singular_vectors=False)[0]\n", "# cond = np.linalg.cond(Frame_op)\n", "\n", " # print\n", " if output==None:\n", " cond = np.linalg.cond(Frame_op)\n", " return cond\n", " else:\n", " with output:\n", " display(\"Lower frame bound = {:.2e}\".format(low), \"Upper frame bound = {:.2e}\".format(up))\n", " if low!=0:\n", " display(\"Condition number = {:.2e}\".format(up/low))\n", " # display(\"Condition number form numpy = {:.2e}\".format(cond))\n" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "# Generate a random connected graph \n", "def random_connected_graph(numNodes, numEdges):\n", " \"\"\"Uses rejection sampling to uniformly sample a connected graph\"\"\"\n", " G = Graph(numNodes)\n", " \n", " if numEdgesnumNodes*(numNodes):\n", " raise ValueError(\"Too many edges\")\n", " all_edges = [(i,j,1) for i in range(numNodes) for j in range(i)]\n", " while True:\n", " G.edges = random.sample(all_edges, numEdges)\n", " if G.is_connected():\n", " break\n", " # breadth first search to determine if it is connected\n", " return G" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [], "source": [ "# Samples from the cluster using sklearn\n", "\n", "def Kmeans_sklrn(X, k):\n", " \"\"\"Assigns the rows of X into k clusters\"\"\"\n", " kmeans = KMeans(n_clusters=k).fit(X)\n", " labels = list(kmeans.labels_)\n", " \n", " clusters = {i:[] for i in range(k)}\n", " \n", " for indx, item in enumerate(labels):\n", " clusters[item].append(indx)\n", " \n", " return labels, clusters\n", "\n", "def clustered_samples(clusters, degrees, order=\"min\"):\n", " \"\"\"In each cluster pick the node with largest degree\"\"\"\n", " if order==\"min\":\n", " samples = [clusters[key][np.argmin(degrees[clusters[key]])] for key in clusters.keys()]\n", " if order==\"max\":\n", " samples = [clusters[key][np.argmax(degrees[clusters[key]])] for key in clusters.keys()]\n", " return samples " ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "# Greedy sample\n", "\n", "def dynamic_basis(G, numIter, tol=0.1):\n", " Lap=G.laplacian()\n", " basis = {}\n", " for node in G.nodes:\n", " # the Diract vector\n", " vec = np.zeros([G.numNodes,1])\n", " i = G.nodes.index(node)\n", " vec[i] = 1\n", " # the iterated system \n", " B = dynamic(Lap, numIter, vec)\n", " Bsvd = orth(B, tol)\n", " basis[node] = Bsvd\n", " return basis\n", " \n", "\n", "def greedy_node(G, samples, basis, U, criterion):\n", " scores = {}\n", " for node in G.nodes:\n", " if node not in samples:\n", " Bsvd = basis[node]\n", " C = U.transpose() @ Bsvd\n", " if criterion==\"frob\":\n", " scores[node] = np.linalg.norm(C)\n", " elif criterion==\"cond\":\n", " scores[node] = np.linalg.cond(C)\n", " if criterion==\"frob\":\n", " # add the new sample with largest correlation\n", " new_sample = max(scores, key=scores.get) \n", " elif criterion==\"cond\":\n", " # add the new sample with smallest condition number\n", " new_sample = min(scores, key=scores.get) \n", " samples.append(new_sample)\n", " # update the recovery space\n", " V = basis[new_sample]\n", " Proj = U.transpose() @ V\n", " basisProj = orth(Proj)\n", " complement = np.eye(U.shape[1]) - basisProj @ basisProj.transpose()\n", " U = orth(U @ complement)\n", " return samples, U\n", " \n", "\n", "def greedy_samples(G, pw_dim, numIter, numSamples, criterion=\"frob\"):\n", " # Compute PW eigenvectors\n", " vals, U = G.lapl_eigen(pw_dim)\n", " basis = dynamic_basis(G, numIter)\n", " samples = []\n", " \n", " for i in range(numSamples):\n", " samples, U = greedy_node(G, samples, basis, U, criterion)\n", " if U.shape[1]==0:\n", " break\n", " return samples" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [], "source": [ "# Sample using the A^n\n", "def degree_sample(Adj, k, s, order=\"min\"):\n", " ones = np.ones([Adj.shape[0],])\n", " importance = np.linalg.matrix_power(Adj, k) @ ones\n", " if order==\"max\":\n", " samples = np.argpartition(importance, -s)[-s:]\n", " if order==\"min\":\n", " samples = np.argpartition(importance, s)[:s]\n", " return samples.tolist()" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "# Sample using largest minor \n", "\n", "from itertools import combinations\n", "\n", "def all_subsets(N,s):\n", " \"\"\"Returns all subsets of size s\"\"\"\n", " set = np.arange(N)\n", " return list(combinations(set, s))\n", "\n", "def minor_sample_double(Adj, k, s):\n", " Mat = np.linalg.matrix_power(Adj, k)\n", " samples_list = samples(Adj.shape[0],s) \n", " numOptions = len(samples_list)\n", " if numOptions>10000:\n", " raise ValueError(\"Matrix is too large\") \n", " vals = np.zeros([N_options, numOptions])\n", " for i in range(numOptions):\n", " for j in range(numOptions):\n", " row_slice = samples_list[i]\n", " column_slice = samples_list[j]\n", " vals[i,j] = np.sum(Mat[row_slice, columns_slice])\n", " i,j = np.unravel_index(np.argmin(vals), vals.shape)\n", " \n", " return samples_list[i], samples_list[j] \n", "\n", "def minor_sample(Adj, k, s):\n", " Mat = np.linalg.matrix_power(Adj, k)\n", " samples_list = all_subsets(Adj.shape[0],s) \n", " numOptions = len(samples_list)\n", " if numOptions>1000000:\n", " raise ValueError(\"Too large\") \n", " vals = np.zeros(numOptions)\n", " for i in range(numOptions):\n", " sample = samples_list[i]\n", " vals[i] = np.sum(Mat[np.ix_(sample, sample)])\n", " ind = np.argmin(vals) \n", " return samples_list[ind]" ] }, { "cell_type": "code", "execution_count": 70, "metadata": {}, "outputs": [], "source": [ "# The figure\n", "fig, ax = plt.subplots(figsize=(10, 10))\n", "ax.spines[\"top\"].set_visible(False) \n", "ax.spines[\"right\"].set_visible(False)\n", "ax.spines[\"bottom\"].set_visible(False)\n", "ax.spines[\"left\"].set_visible(False)\n", "plt.close()\n", "\n", "output = widgets.Output()" ] }, { "cell_type": "code", "execution_count": 71, "metadata": {}, "outputs": [], "source": [ "# generate a random graph\n", "numNodes=30\n", "numEdges=36\n", "G = random_connected_graph(numNodes, numEdges)" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7276a8a0498e4a939ea5f3dc383a5424", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# set up samples for different algorithms and plot\n", "\n", "numSamples=5\n", "numIter=10\n", "pwDim=5\n", "\n", "output.clear_output()\n", "\n", "# add eigenvalues to the output\n", "eig, U = G.lapl_eigen()\n", "with output:\n", " display(\"Laplacian eigenvalues\")\n", " display(eig)\n", "\n", "# sampled with spectral clusters\n", "_, X = G.lapl_eigen(numSamples)\n", "labels, clusters = Kmeans_sklrn(X, k=numSamples) \n", "degrees = G.degrees()\n", "G.samples = clustered_samples(clusters, degrees, order=\"min\")\n", "gds(G, pwDim, numIter, output)\n", "G.draw(ax, output, labels=labels, title=\"Clustered\")\n", "\n", "# greedy sample with frob\n", "G.samples = greedy_samples(G, pwDim, numIter, numSamples, criterion=\"frob\")\n", "gds(G, pwDim, numIter, output)\n", "G.draw(ax, output, title=\"Greedy\")\n", "\n", "# # greedy sample with cond\n", "# G.samples = greedy_samples(G, pwDim, numIter, numSamples, criterion=\"cond\")\n", "# gds(G, pwDim, numIter, output)\n", "# G.draw(ax, output)\n", "\n", "# best sample\n", "samples_list = all_subsets(G.numNodes, numSamples) \n", "numOptions = len(samples_list)\n", "if numOptions>1000000:\n", " raise ValueError(\"Too large\") \n", "vals = np.zeros(numOptions)\n", "for i in range(numOptions):\n", " G.samples = samples_list[i]\n", " vals[i] = gds(G, pwDim, numIter, output=None)\n", "ind = np.argmin(vals)\n", "G.samples = samples_list[ind]\n", "gds(G, pwDim, numIter, output)\n", "G.draw(ax, output, title=\"Best\") \n", " \n", "\n", "# # sampled with random samples\n", "# G.samples = random.sample(G.nodes, numSamples)\n", "# gds(G, pwDim, numIter, output)\n", "# G.draw(ax, output)\n", "\n", "# sampled with A^n\n", "G.samples = degree_sample(G.adj(), numSamples, numSamples, order=\"min\")\n", "gds(G, pwDim, numIter, output) \n", "G.draw(ax, output, title=\"A^n\")\n", "\n", "\n", "# # sampled with A^n minors\n", "# G.samples = minor_sample(G.adj(), numSamples, numSamples)\n", "# gds(G, pwDim, numIter, output) \n", "# G.draw(ax, output)\n", "\n", "display(output)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 }