{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DATA Availability\n",
    "\n",
    "Data for these notebooks can be found here: https://github.com/VHRanger/Graph-Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gc\n",
    "import networkx as nx\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd\n",
    "import time\n",
    "import scipy\n",
    "import sklearn\n",
    "from sklearn import cluster, linear_model\n",
    "from sklearn.decomposition import TruncatedSVD\n",
    "from sklearn.preprocessing import MultiLabelBinarizer\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.multiclass import OneVsRestClassifier\n",
    "import sys\n",
    "import warnings # Silence perf warning\n",
    "\n",
    "sys.path.append(os.path.realpath('..'))\n",
    "\n",
    "import nodevectors\n",
    "import csrgraph as cg\n",
    "from csrgraph import methods\n",
    "from nodevectors.evaluation import link_pred\n",
    "from nodevectors.evaluation import graph_eval\n",
    "\n",
    "# From the related karateclub lib (on pip)\n",
    "# https://github.com/benedekrozemberczki/KarateClub\n",
    "from karateclub.node_embedding.neighbourhood import GraRep, NodeSketch, Walklets\n",
    "# UMAP to test (on pip)\n",
    "import umap\n",
    "\n",
    "warnings.simplefilter(\"ignore\")\n",
    "\n",
    "def nx_node_weights(G, method, **kwargs):\n",
    "    \"\"\"Node Weights through networkX API\"\"\"\n",
    "    pr = np.zeros(len(G))\n",
    "    prdict = method(G, **kwargs)\n",
    "    for i in G.nodes:\n",
    "        pr[i] = prdict[i]\n",
    "    return pr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#### CONFIG\n",
    "N_COMPONENTS = 6 # resulting embedding dim\n",
    "SEED = 42 # RNG Seed\n",
    "TEST_SIZE = 0.2\n",
    "\n",
    "# For resampling tests\n",
    "RESAMPLE_WALKS = 30\n",
    "RESAMPLE_LEN = 5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#### GRAPHS\n",
    "#### Uncomment one to choose which graph to run evaluation on\n",
    "\n",
    "#### Artificial random graphs\n",
    "# G = nx.binomial_graph(700, 0.6)\n",
    "# G, labels = graph_eval.make_cluster_graph(n_nodes=820, n_clusters=18, connections=1000, drop_pct=0.5)\n",
    "G, labels = graph_eval.make_weighed_cluster_graph(n_nodes=500, n_clusters=6, connections=1500, drop_pct=0.2, max_edge_weight=15)\n",
    "#### Social graphs\n",
    "# G, labels = graph_eval.make_blogcatalog(dedupe=True)\n",
    "# G, mlabels = graph_eval.make_blogcatalog(dedupe=False)\n",
    "# G, labels = graph_eval.make_email()\n",
    "# G, labels = graph_eval.get_karateclub(\"facebook\") # twitch, github, facebook, wikipedia\n",
    "# G = graph_eval.get_from_snap(url=\"http://snap.stanford.edu/data/facebook_combined.txt.gz\", sep=' ', header=None, comment='#')\n",
    "#### Biology Graphs\n",
    "# G, mlabels = graph_eval.get_n2v_ppi(\"../data/bioNEV/node2vec_PPI\")\n",
    "\n",
    "\n",
    "#### Needs OutOfBounds Nodes support from CSRGraphs to work\n",
    "# G = graph_eval.get_drugbank_ddi(\"../data/bioNEV/DrugBank_DDI\")\n",
    "# G, mlabels = graph_eval.get_mashup_ppi(\"../data/bioNEV/Mashup_PPI\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "clusters: 6\n",
      "Nodes: 500\n",
      "Edges: 17668\n",
      "connected: True\n"
     ]
    }
   ],
   "source": [
    "#### For Link Prediction: Split graph into train and test edge sets\n",
    "#### (All nodes are still present in both)\n",
    "G_train, testing_pos_edges = link_pred.split_train_test_graph(G, testing_ratio=TEST_SIZE)\n",
    "\n",
    "#### Lazy way to set up evaluation\n",
    "try:\n",
    "    y = labels.label\n",
    "    n_clusters = y.nunique()\n",
    "    HAS_LABELS = True\n",
    "    print(f\"clusters: {n_clusters}\")\n",
    "except:\n",
    "    try: # Multilabels \n",
    "        y = MultiLabelBinarizer().fit_transform(mlabels.mlabels)\n",
    "        HAS_LABELS = True\n",
    "        print(f\"multilabels: {y.shape[1]}\")\n",
    "    except: # No Labels\n",
    "        HAS_LABELS = False\n",
    "        print(\"No Labels\")\n",
    "NNODES = len(G)\n",
    "print(f\"Nodes: {NNODES}\\nEdges: {len(G.edges)}\\nconnected: {nx.is_connected(G_train)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loss: 0.4954\t:   2%|▏         | 101/6000 [00:02<02:51, 34.48it/s] \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converged! Loss: 0.4951\n",
      "Time: 2.9698\n",
      "Link Prediction:\n",
      "\t(logit) AUC-ROC: 0.516, AUC-PR: 0.507, Acc: 0.511, F1: 0.512\n",
      "\t(lgbm)  AUC-ROC: 0.734, AUC-PR: 0.707, Acc: 0.673, F1: 0.688\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loss: 0.4937\t:   2%|▏         | 101/6000 [00:00<00:16, 352.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converged! Loss: 0.4954\n",
      "MI: 0.18, RAND 0.30, FM: 0.30\n",
      "Label Prediction:\n",
      "\t(logit) Acc: 0.540, F1 micro: 0.540, F1 macro: 0.540\n",
      "\t(lgbm) Acc: 0.450, F1 micro: 0.450, F1 macro: 0.450\n"
     ]
    }
   ],
   "source": [
    "ggvec_params = dict(\n",
    "    n_components=N_COMPONENTS,\n",
    "    order=1,\n",
    "    tol=0.1,\n",
    "    tol_samples=100,\n",
    "    max_epoch=6_000,\n",
    "    learning_rate=0.1,\n",
    "    negative_ratio=0.05,\n",
    "    exponent=0.33,\n",
    "    verbose=True,\n",
    ")\n",
    "\n",
    "start_t = time.time()\n",
    "w_train = nodevectors.GGVec(**ggvec_params).fit_transform(G_train)\n",
    "\n",
    "print(f\"Time: {time.time() - start_t :.4f}\")\n",
    "result = link_pred.LinkPrediction(w_train, G, G_train, testing_pos_edges)\n",
    "time.sleep(0.1)\n",
    "if HAS_LABELS:\n",
    "    w = nodevectors.GGVec(**ggvec_params).fit_transform(G)\n",
    "    graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Making walks... Done, T=1.92\n",
      "Mapping Walk Names... Done, T=0.10\n",
      "Training W2V... Done, T=0.32\n",
      "Time: 2.3881\n",
      "Link Prediction:\n",
      "\t(logit) AUC-ROC: 0.552, AUC-PR: 0.482, Acc: 0.539, F1: 0.534\n",
      "\t(lgbm)  AUC-ROC: 0.948, AUC-PR: 0.930, Acc: 0.918, F1: 0.920\n",
      "Making walks... Done, T=0.01\n",
      "Mapping Walk Names... Done, T=0.13\n",
      "Training W2V... Done, T=0.31\n",
      "MI: 0.93, RAND 0.86, FM: 0.86\n",
      "Label Prediction:\n",
      "\t(logit) Acc: 0.940, F1 micro: 0.940, F1 macro: 0.940\n",
      "\t(lgbm) Acc: 0.950, F1 micro: 0.950, F1 macro: 0.950\n"
     ]
    }
   ],
   "source": [
    "n2v_params = dict(\n",
    "    n_components=N_COMPONENTS,\n",
    "    epochs=5,\n",
    "    walklen=30,\n",
    "    return_weight=1.,\n",
    "    neighbor_weight=1.,\n",
    "    w2vparams={\n",
    "        \"window\":3, \n",
    "        \"negative\":5, \n",
    "        \"iter\":2,\n",
    "        \"batch_words\":128}\n",
    ")\n",
    "\n",
    "start_t = time.time()\n",
    "w_train = nodevectors.Node2Vec(**n2v_params).fit_transform(G_train)\n",
    "print(f\"Time: {time.time() - start_t :.4f}\")\n",
    "result = link_pred.LinkPrediction(w_train, G, G_train, testing_pos_edges)\n",
    "if HAS_LABELS:\n",
    "    w = nodevectors.Node2Vec(**n2v_params).fit_transform(G)\n",
    "    graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time: 0.0773\n",
      "Link Prediction:\n",
      "\t(logit) AUC-ROC: 0.528, AUC-PR: 0.463, Acc: 0.538, F1: 0.540\n",
      "\t(lgbm)  AUC-ROC: 0.951, AUC-PR: 0.940, Acc: 0.928, F1: 0.928\n",
      "MI: 0.87, RAND 0.82, FM: 0.82\n",
      "Label Prediction:\n",
      "\t(logit) Acc: 0.980, F1 micro: 0.980, F1 macro: 0.980\n",
      "\t(lgbm) Acc: 0.990, F1 micro: 0.990, F1 macro: 0.990\n"
     ]
    }
   ],
   "source": [
    "pne_params = dict(\n",
    "    n_components=N_COMPONENTS,\n",
    "    step=5,\n",
    "    mu=0.2,\n",
    "    theta=0.5,\n",
    ")\n",
    "\n",
    "start_t = time.time()\n",
    "pne = nodevectors.ProNE(**pne_params)\n",
    "w_train = pne.fit_transform(G_train)\n",
    "print(f\"Time: {time.time() - start_t :.4f}\")\n",
    "result = link_pred.LinkPrediction(w_train, G, G_train, testing_pos_edges)\n",
    "if HAS_LABELS:\n",
    "    pne = nodevectors.ProNE(**pne_params)\n",
    "    w = pne.fit_transform(G)\n",
    "    graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2/2 [00:00<00:00, 17.00it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time: 0.2583\n",
      "Link Prediction:\n",
      "\t(logit) AUC-ROC: 0.515, AUC-PR: 0.453, Acc: 0.565, F1: 0.599\n",
      "\t(lgbm)  AUC-ROC: 0.957, AUC-PR: 0.939, Acc: 0.941, F1: 0.940\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2/2 [00:00<00:00, 17.91it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MI: 1.00, RAND 1.00, FM: 1.00\n",
      "Label Prediction:\n",
      "\t(logit) Acc: 1.000, F1 micro: 1.000, F1 macro: 1.000\n",
      "\t(lgbm) Acc: 1.000, F1 micro: 1.000, F1 macro: 1.000\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "grarep_params = dict(\n",
    "    n_components=N_COMPONENTS,\n",
    "    order=2,\n",
    "    embedder=TruncatedSVD(\n",
    "        n_iter=10,\n",
    "        random_state=42),\n",
    "    merger=(lambda x : np.sum(x, axis=0)),\n",
    ")\n",
    "\n",
    "start_t = time.time()\n",
    "w_train = nodevectors.GraRep(**grarep_params).fit_transform(G_train)\n",
    "\n",
    "print(f\"Time: {time.time() - start_t :.4f}\")\n",
    "result = link_pred.LinkPrediction(w_train, G, G_train, testing_pos_edges)\n",
    "time.sleep(0.1)\n",
    "if HAS_LABELS:\n",
    "    w = nodevectors.GraRep(**grarep_params).fit_transform(G)\n",
    "    graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loss: 0.0229\t:   4%|▍         | 228/6000 [00:02<01:11, 80.47it/s]\n",
      "Loss: 0.0241\t:   0%|          | 7/6000 [00:00<01:27, 68.87it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converged! Loss: 0.0225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loss: 0.0158\t:   4%|▎         | 216/6000 [00:03<01:23, 69.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converged! Loss: 0.0156\n",
      "Time: 6.0436\n",
      "Link Prediction:\n",
      "\t(logit) AUC-ROC: 0.534, AUC-PR: 0.465, Acc: 0.513, F1: 0.513\n",
      "\t(lgbm)  AUC-ROC: 0.953, AUC-PR: 0.939, Acc: 0.931, F1: 0.932\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loss: 0.0229\t:   4%|▎         | 218/6000 [00:02<01:07, 85.74it/s]\n",
      "Loss: 0.0243\t:   0%|          | 7/6000 [00:00<01:32, 64.95it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converged! Loss: 0.0229\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loss: 0.0155\t:   4%|▎         | 214/6000 [00:03<01:27, 66.29it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converged! Loss: 0.0155\n",
      "MI: 1.00, RAND 1.00, FM: 1.00\n",
      "Label Prediction:\n",
      "\t(logit) Acc: 1.000, F1 micro: 1.000, F1 macro: 1.000\n",
      "\t(lgbm) Acc: 1.000, F1 micro: 1.000, F1 macro: 1.000\n"
     ]
    }
   ],
   "source": [
    "##### GraRep + GGVec ####\n",
    "grarep_params = dict(\n",
    "    n_components=N_COMPONENTS,\n",
    "    order=2,\n",
    "    embedder=nodevectors.GGVec(\n",
    "        n_components=N_COMPONENTS,\n",
    "        tol=0.1,\n",
    "        tol_samples=200,\n",
    "        max_epoch=6_000,\n",
    "        learning_rate=0.02,\n",
    "        negative_ratio=0.6,\n",
    "        exponent=0.33,\n",
    "        verbose=True,\n",
    "    ),\n",
    "    verbose=False,\n",
    "    merger=(lambda x : np.sum(x, axis=0)),\n",
    ")\n",
    "\n",
    "start_t = time.time()\n",
    "w_train = nodevectors.GraRep(**grarep_params).fit_transform(G_train)\n",
    "\n",
    "print(f\"Time: {time.time() - start_t :.4f}\")\n",
    "result = link_pred.LinkPrediction(w_train, G, G_train, testing_pos_edges)\n",
    "time.sleep(0.1)\n",
    "if HAS_LABELS:\n",
    "    w = nodevectors.GraRep(**grarep_params).fit_transform(G)\n",
    "    graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time: 3.8006\n",
      "Link Prediction:\n",
      "\t(logit) AUC-ROC: 0.541, AUC-PR: 0.472, Acc: 0.534, F1: 0.537\n",
      "\t(lgbm)  AUC-ROC: 0.952, AUC-PR: 0.938, Acc: 0.939, F1: 0.939\n",
      "MI: 1.00, RAND 1.00, FM: 1.00\n",
      "Label Prediction:\n",
      "\t(logit) Acc: 1.000, F1 micro: 1.000, F1 macro: 1.000\n",
      "\t(lgbm) Acc: 1.000, F1 micro: 1.000, F1 macro: 1.000\n"
     ]
    }
   ],
   "source": [
    "ump_params = dict(\n",
    "    embedder=umap.UMAP,\n",
    "    n_neighbors=3,\n",
    "    min_dist=0.,\n",
    "    metric='cosine',\n",
    "    normalize_graph=True,\n",
    "    n_components=N_COMPONENTS,\n",
    ")\n",
    "\n",
    "start_t = time.time()\n",
    "w_train = nodevectors.SKLearnEmbedder(**ump_params).fit_transform(G_train)\n",
    "print(f\"Time: {time.time() - start_t :.4f}\")\n",
    "result = link_pred.LinkPrediction(w_train, G, G_train, testing_pos_edges)\n",
    "if HAS_LABELS:\n",
    "    w = nodevectors.SKLearnEmbedder(**ump_params).fit_transform(G)\n",
    "    graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  1%|▏         | 76/6000 [00:02<02:54, 33.95it/s]  \n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time: 2.9679\n",
      "Virtual edges: 53851\n",
      "Link Prediction:\n",
      "\t(logit) AUC-ROC: 0.535, AUC-PR: 0.472, Acc: 0.527, F1: 0.525\n",
      "\t(lgbm)  AUC-ROC: 0.944, AUC-PR: 0.936, Acc: 0.904, F1: 0.906\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  5%|▌         | 327/6000 [00:01<00:24, 236.32it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MI: 1.00, RAND 1.00, FM: 1.00\n",
      "Label Prediction:\n",
      "\t(logit) Acc: 1.000, F1 micro: 1.000, F1 macro: 1.000\n",
      "\t(lgbm) Acc: 1.000, F1 micro: 1.000, F1 macro: 1.000\n"
     ]
    }
   ],
   "source": [
    "### GLoVe with random walks ###\n",
    "glove_params = dict(\n",
    "    n_components=N_COMPONENTS,\n",
    "    tol=0.0005,\n",
    "    max_epoch=6_000,\n",
    "    learning_rate=0.02, \n",
    "    max_loss=10.,\n",
    "    max_count=50, \n",
    "    exponent=0.5,\n",
    ")\n",
    "\n",
    "start_t = time.time()\n",
    "wg = cg.csrgraph(G_train).random_walk_resample(walklen=RESAMPLE_LEN, epochs=RESAMPLE_WALKS)\n",
    "w_train = nodevectors.Glove(**glove_params).fit_transform(wg)\n",
    "\n",
    "print(f\"Time: {time.time() - start_t :.4f}\")\n",
    "print(f\"Virtual edges: {wg.dst.size}\")\n",
    "result = link_pred.LinkPrediction(w_train, G, G_train, testing_pos_edges)\n",
    "if HAS_LABELS:\n",
    "    wg = cg.csrgraph(G).random_walk_resample(walklen=RESAMPLE_LEN, epochs=RESAMPLE_WALKS)\n",
    "    w = nodevectors.Glove(**glove_params).fit_transform(wg)\n",
    "    graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loss: 0.2861\t:  16%|█▌        | 967/6000 [00:05<00:26, 188.58it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converged! Loss: 0.2859\n",
      "Time: 5.6420\n",
      "Virtual edges: 54151\n",
      "Link Prediction:\n",
      "\t(logit) AUC-ROC: 0.534, AUC-PR: 0.485, Acc: 0.527, F1: 0.530\n",
      "\t(lgbm)  AUC-ROC: 0.958, AUC-PR: 0.944, Acc: 0.937, F1: 0.937\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loss: 0.2796\t:  15%|█▌        | 911/6000 [00:03<00:18, 270.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Converged! Loss: 0.2795\n",
      "MI: 1.00, RAND 1.00, FM: 1.00\n",
      "Label Prediction:\n",
      "\t(logit) Acc: 1.000, F1 micro: 1.000, F1 macro: 1.000\n",
      "\t(lgbm) Acc: 0.990, F1 micro: 0.990, F1 macro: 0.990\n"
     ]
    }
   ],
   "source": [
    "### GGVec with random walks ###\n",
    "ggvec_params = dict(\n",
    "    n_components=N_COMPONENTS,\n",
    "    tol=0.02,\n",
    "    tol_samples=200,\n",
    "    max_epoch=6_000,\n",
    "    learning_rate=0.02,\n",
    "    negative_ratio=0.3,\n",
    "    exponent=0.35,\n",
    "    verbose=True,\n",
    ")\n",
    "\n",
    "start_t = time.time()\n",
    "wg = cg.csrgraph(G_train).random_walk_resample(walklen=RESAMPLE_LEN, epochs=RESAMPLE_WALKS)\n",
    "w_train = wg.ggvec(**ggvec_params)\n",
    "\n",
    "print(f\"Time: {time.time() - start_t :.4f}\")\n",
    "print(f\"Virtual edges: {wg.dst.size}\")\n",
    "result = link_pred.LinkPrediction(w_train, G, G_train, testing_pos_edges)\n",
    "if HAS_LABELS:\n",
    "    wg = cg.csrgraph(G).random_walk_resample(walklen=RESAMPLE_LEN, epochs=RESAMPLE_WALKS)\n",
    "    w = wg.ggvec(**ggvec_params)\n",
    "    graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "###### Slooooowwwwwww ########\n",
    "# walklets_params = dict(\n",
    "#     walk_number=10, \n",
    "#     walk_length=30, \n",
    "#     dimensions=N_COMPONENTS,\n",
    "#     window_size=4,\n",
    "#     epochs=1, \n",
    "#     learning_rate=0.05\n",
    "# )\n",
    "\n",
    "# try: # Karateclub models don't handle certain graphs\n",
    "#     start_t = time.time()\n",
    "#     model = Walklets(**walklets_params)\n",
    "#     model.fit(G_train)\n",
    "#     print(f\"Time: {time.time() - start_t :.3f}\")\n",
    "#     w_train = model.get_embedding()\n",
    "#     result = link_pred.LinkPrediction(w_train, G, G_train, testing_pos_edges)\n",
    "#     if HAS_LABELS:\n",
    "#         model = Walklets(**walklets_params)\n",
    "#         model.fit(G)\n",
    "#         w = model.get_embedding()\n",
    "#         graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)\n",
    "# except: pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "### Completely random baseline ###\n",
    "\n",
    "w = np.random.randn(len(G), N_COMPONENTS)\n",
    "\n",
    "result = link_pred.LinkPrediction(w, G, G_train, testing_pos_edges)\n",
    "try:\n",
    "    graph_eval.print_labeled_tests(w, y, test_size=TEST_SIZE, seed=SEED)\n",
    "except: pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}