{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test the GMF module under folder [cf_ec2](../cf_ec2) with ml-1m dataset, save the best model (using integrated modules with compile and fit components)\n",
    "\n",
    "#### 3/18/2020"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np \n",
    "import pandas as pd\n",
    "import keras\n",
    "from keras import Model\n",
    "from keras.regularizers import l2\n",
    "from keras.optimizers import (\n",
    "    Adam,\n",
    "    Adamax,\n",
    "    Adagrad,\n",
    "    SGD,\n",
    "    RMSprop\n",
    ")\n",
    "from keras.layers import (\n",
    "    Embedding, \n",
    "    Input,\n",
    "    Flatten, \n",
    "    Multiply, \n",
    "    Concatenate,\n",
    "    Dense\n",
    ")\n",
    "\n",
    "import sys\n",
    "sys.path.append('../')\n",
    "from cf_ec2 import (\n",
    "    GMF,\n",
    "    MLP,\n",
    "    NCF,\n",
    "    Data,\n",
    "    evaluation,\n",
    "    evaluation_grouped\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## step 1: load the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_csv('../data/ml-1m.train.rating',sep='\\t',header=None,names=['user','item','rating','event_ts'])\n",
    "test = pd.read_csv('../data/ml-1m.test.rating',sep='\\t',header=None,names=['user','item','rating','event_ts'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(6040, (6040, 4))"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.user.nunique(), test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## step 2: prepare the data for gmf model training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = Data(\n",
    "    train=train,\n",
    "    test=test,\n",
    "    col_user='user',\n",
    "    col_item='item',\n",
    "    col_rating='rating',\n",
    "    col_time='event_ts',\n",
    "    binary=True,\n",
    "    n_neg=4,\n",
    "    n_neg_test=100\n",
    ")\n",
    "dataset.prepTrainDNN(negSample=True)\n",
    "dataset.prepTestDNN(group=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>item_interacted</th>\n",
       "      <th>item_negative</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...</td>\n",
       "      <td>{52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 6...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>{15, 22, 31, 34, 35, 42, 43, 52, 53, 54, 55, 5...</td>\n",
       "      <td>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>{2, 135, 136, 14, 18, 147, 159, 163, 36, 40, 1...</td>\n",
       "      <td>{0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user                                    item_interacted  \\\n",
       "0     0  {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...   \n",
       "1     1  {15, 22, 31, 34, 35, 42, 43, 52, 53, 54, 55, 5...   \n",
       "2     2  {2, 135, 136, 14, 18, 147, 159, 163, 36, 40, 1...   \n",
       "\n",
       "                                       item_negative  \n",
       "0  {52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 6...  \n",
       "1  {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...  \n",
       "2  {0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15...  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset.interaction_train.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### prepare the test dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "newItems = set(dataset.items_test)-set(dataset.items)\n",
    "idx2del = []\n",
    "for idx,item in enumerate(dataset.items_test):\n",
    "    if item in newItems:\n",
    "        idx2del.append(idx)\n",
    "\n",
    "length_test_original = len(dataset.users_test)\n",
    "dataset.users_test = [\n",
    "    dataset.users_test[idx]\n",
    "    for idx in range(length_test_original) if idx not in idx2del\n",
    "]\n",
    "dataset.items_test = [\n",
    "    dataset.items_test[idx]\n",
    "    for idx in range(length_test_original) if idx not in idx2del\n",
    "]\n",
    "dataset.ratings_test = [\n",
    "    dataset.ratings_test[idx]\n",
    "    for idx in range(length_test_original) if idx not in idx2del\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## step 3: create the model architecture and fit model with training data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "n_users = 6040\n",
    "n_items = 3704\n",
    "n_factors_gmf = 32\n",
    "layers_mlp = [64,32,16,8]\n",
    "reg_gmf = 0.\n",
    "reg_layers_mlp = [0.,0.,0.,0.]\n",
    "learning_rate = 0.01\n",
    "flg_pretrain = ''\n",
    "filepath = ''\n",
    "filepath_gmf_pretrain = ''\n",
    "filepath_mlp_pretrain = ''\n",
    "num_epochs = 20\n",
    "batch_size = 100\n",
    "\n",
    "\n",
    "gmf = GMF(\n",
    "    n_users=n_users,\n",
    "    n_items=n_items,\n",
    "    n_factors_gmf=n_factors_gmf\n",
    ")\n",
    "gmf.create_model()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "gmf.compile(learning_rate=0.01)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/xyin/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n",
      "  \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 4970845 samples, validate on 610038 samples\n",
      "Epoch 1/20\n",
      " - 140s - loss: 0.3344 - accuracy: 0.8515 - val_loss: 0.1743 - val_accuracy: 0.9315\n",
      "\n",
      "Epoch 00001: val_loss improved from inf to 0.17435, saving model to /Users/xyin/Documents/work/projects/rec_utils/metadata/gmf/gmf-weights-improvement-01-0.1743.hdf5\n",
      "Epoch 2/20\n",
      " - 134s - loss: 0.3026 - accuracy: 0.8685 - val_loss: 0.1670 - val_accuracy: 0.9302\n",
      "\n",
      "Epoch 00002: val_loss improved from 0.17435 to 0.16697, saving model to /Users/xyin/Documents/work/projects/rec_utils/metadata/gmf/gmf-weights-improvement-02-0.1670.hdf5\n",
      "Epoch 3/20\n",
      " - 139s - loss: 0.3021 - accuracy: 0.8699 - val_loss: 0.1932 - val_accuracy: 0.9173\n",
      "\n",
      "Epoch 00003: val_loss did not improve from 0.16697\n",
      "Epoch 4/20\n",
      " - 133s - loss: 0.3051 - accuracy: 0.8699 - val_loss: 0.1588 - val_accuracy: 0.9335\n",
      "\n",
      "Epoch 00004: val_loss improved from 0.16697 to 0.15884, saving model to /Users/xyin/Documents/work/projects/rec_utils/metadata/gmf/gmf-weights-improvement-04-0.1588.hdf5\n",
      "Epoch 5/20\n",
      " - 131s - loss: 0.3075 - accuracy: 0.8702 - val_loss: 0.1795 - val_accuracy: 0.9208\n",
      "\n",
      "Epoch 00005: val_loss did not improve from 0.15884\n",
      "Epoch 6/20\n",
      " - 132s - loss: 0.3101 - accuracy: 0.8698 - val_loss: 0.1624 - val_accuracy: 0.9293\n",
      "\n",
      "Epoch 00006: val_loss did not improve from 0.15884\n",
      "Epoch 7/20\n",
      " - 127s - loss: 0.3130 - accuracy: 0.8695 - val_loss: 0.2141 - val_accuracy: 0.9046\n",
      "\n",
      "Epoch 00007: val_loss did not improve from 0.15884\n",
      "Epoch 8/20\n",
      " - 126s - loss: 0.3158 - accuracy: 0.8690 - val_loss: 0.2003 - val_accuracy: 0.9094\n",
      "\n",
      "Epoch 00008: val_loss did not improve from 0.15884\n",
      "\n",
      "Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0029999999329447745.\n",
      "Epoch 9/20\n",
      " - 135s - loss: 0.2802 - accuracy: 0.8809 - val_loss: 0.1479 - val_accuracy: 0.9366\n",
      "\n",
      "Epoch 00009: val_loss improved from 0.15884 to 0.14790, saving model to /Users/xyin/Documents/work/projects/rec_utils/metadata/gmf/gmf-weights-improvement-09-0.1479.hdf5\n",
      "Epoch 10/20\n",
      " - 128s - loss: 0.2759 - accuracy: 0.8829 - val_loss: 0.1420 - val_accuracy: 0.9403\n",
      "\n",
      "Epoch 00010: val_loss improved from 0.14790 to 0.14199, saving model to /Users/xyin/Documents/work/projects/rec_utils/metadata/gmf/gmf-weights-improvement-10-0.1420.hdf5\n",
      "Epoch 11/20\n",
      " - 126s - loss: 0.2711 - accuracy: 0.8854 - val_loss: 0.1357 - val_accuracy: 0.9434\n",
      "\n",
      "Epoch 00011: val_loss improved from 0.14199 to 0.13571, saving model to /Users/xyin/Documents/work/projects/rec_utils/metadata/gmf/gmf-weights-improvement-11-0.1357.hdf5\n",
      "Epoch 12/20\n",
      " - 137s - loss: 0.2657 - accuracy: 0.8884 - val_loss: 0.1365 - val_accuracy: 0.9438\n",
      "\n",
      "Epoch 00012: val_loss did not improve from 0.13571\n",
      "Epoch 13/20\n",
      " - 126s - loss: 0.2604 - accuracy: 0.8912 - val_loss: 0.1747 - val_accuracy: 0.9252\n",
      "\n",
      "Epoch 00013: val_loss did not improve from 0.13571\n",
      "Epoch 14/20\n",
      " - 124s - loss: 0.2561 - accuracy: 0.8932 - val_loss: 0.1606 - val_accuracy: 0.9323\n",
      "\n",
      "Epoch 00014: val_loss did not improve from 0.13571\n",
      "Epoch 15/20\n",
      " - 133s - loss: 0.2533 - accuracy: 0.8949 - val_loss: 0.1698 - val_accuracy: 0.9282\n",
      "\n",
      "Epoch 00015: val_loss did not improve from 0.13571\n",
      "\n",
      "Epoch 00015: ReduceLROnPlateau reducing learning rate to 0.0009000000078231095.\n",
      "Epoch 16/20\n",
      " - 130s - loss: 0.2304 - accuracy: 0.9041 - val_loss: 0.1834 - val_accuracy: 0.9216\n",
      "\n",
      "Epoch 00016: val_loss did not improve from 0.13571\n",
      "Epoch 17/20\n",
      " - 128s - loss: 0.2288 - accuracy: 0.9049 - val_loss: 0.1515 - val_accuracy: 0.9374\n",
      "\n",
      "Epoch 00017: val_loss did not improve from 0.13571\n",
      "Epoch 18/20\n",
      " - 127s - loss: 0.2277 - accuracy: 0.9056 - val_loss: 0.1380 - val_accuracy: 0.9438\n",
      "\n",
      "Epoch 00018: val_loss did not improve from 0.13571\n",
      "Epoch 19/20\n",
      " - 128s - loss: 0.2268 - accuracy: 0.9060 - val_loss: 0.2042 - val_accuracy: 0.9120\n",
      "\n",
      "Epoch 00019: val_loss did not improve from 0.13571\n",
      "\n",
      "Epoch 00019: ReduceLROnPlateau reducing learning rate to 0.00026999999536201356.\n",
      "Epoch 20/20\n",
      " - 132s - loss: 0.2190 - accuracy: 0.9094 - val_loss: 0.1693 - val_accuracy: 0.9287\n",
      "\n",
      "Epoch 00020: val_loss did not improve from 0.13571\n"
     ]
    }
   ],
   "source": [
    "hist = gmf.fit(\n",
    "    dataset=dataset,\n",
    "    batch_size=batch_size,\n",
    "    num_epochs=num_epochs,\n",
    "    path_model_weights='/Users/xyin/Documents/work/projects/rec_utils/metadata/gmf/gmf-weights-improvement-{epoch:02d}-{val_loss:.4f}.hdf5',\n",
    "    path_csvlog='/Users/xyin/Documents/work/projects/rec_utils/metadata/gmf/gmf_log.csv'\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### double check the current state of the trained model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<module 'cf_ec2.evaluation_grouped' from '../cf_ec2/evaluation_grouped.py'>"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import imp\n",
    "imp.reload(evaluation_grouped)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 6040/6040 [02:11<00:00, 46.00it/s]\n"
     ]
    }
   ],
   "source": [
    "evaluator = evaluation_grouped.metricsEval(\n",
    "    model=gmf.model,\n",
    "    users=dataset.users,\n",
    "    items=dataset.items\n",
    ")\n",
    "evaluator.getRecs()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>userID</th>\n",
       "      <th>itemID</th>\n",
       "      <th>prediction</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.956932</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0.991923</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0.988913</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   userID  itemID  prediction\n",
       "0       0       0    0.956932\n",
       "1       0       1    0.991923\n",
       "2       0       2    0.988913"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluator.all_predictions.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "rmse,auc,logloss = evaluator.getOverlapBasedMetrics(\n",
    "    dataset.users_test,\n",
    "    dataset.items_test,\n",
    "    dataset.ratings_test\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.22678508081596974, 0.8910193560266216, 0.16930384419089145)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rmse,auc,logloss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "it proves that the model is still at the state of last epoch !!!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can also do something like this"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "scores = gmf.model.evaluate(\n",
    "    x = [\n",
    "        np.array(dataset.users_test),\n",
    "        np.array(dataset.items_test)\n",
    "    ],\n",
    "    y = np.array(dataset.ratings_test),\n",
    "    verbose=0\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.16930384472775314, 0.9286995530128479]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['loss', 'accuracy']"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "gmf.model.metrics_names"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### try to load the parameters from the best model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "gmf.model.load_weights('../metadata/gmf/gmf-weights-improvement-11-0.1357.hdf5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "scores = gmf.model.evaluate(\n",
    "    x = [\n",
    "        np.array(dataset.users_test),\n",
    "        np.array(dataset.items_test)\n",
    "    ],\n",
    "    y = np.array(dataset.ratings_test),\n",
    "    verbose=0\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.13571090596280566, 0.9433576464653015]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 6040/6040 [02:15<00:00, 44.54it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(0.20208704972160227, 0.8827614048663103, 0.13571090694790497)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluator = evaluation_grouped.metricsEval(\n",
    "    model=gmf.model,\n",
    "    users=dataset.users,\n",
    "    items=dataset.items\n",
    ")\n",
    "evaluator.getRecs()\n",
    "rmse,auc,logloss = evaluator.getOverlapBasedMetrics(\n",
    "    dataset.users_test,\n",
    "    dataset.items_test,\n",
    "    dataset.ratings_test\n",
    ")\n",
    "rmse,auc,logloss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### try create a new model by loading pre-trained parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "gmf2 = GMF(\n",
    "    n_users=n_users,\n",
    "    n_items=n_items,\n",
    "    n_factors_gmf=n_factors_gmf\n",
    ")\n",
    "gmf2.create_model(path_pretrain='../metadata/gmf/gmf-weights-improvement-11-0.1357.hdf5')\n",
    "gmf2.compile(learning_rate=learning_rate)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.13571090596280566, 0.9433576464653015]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores = gmf2.model.evaluate(\n",
    "    x = [\n",
    "        np.array(dataset.users_test),\n",
    "        np.array(dataset.items_test)\n",
    "    ],\n",
    "    y = np.array(dataset.ratings_test),\n",
    "    verbose=0\n",
    ")\n",
    "scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 6040/6040 [02:18<00:00, 43.61it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(0.20208704972160227, 0.8827614048663103, 0.13571090694790497)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluator = evaluation_grouped.metricsEval(\n",
    "    model=gmf2.model,\n",
    "    users=dataset.users,\n",
    "    items=dataset.items\n",
    ")\n",
    "evaluator.getRecs()\n",
    "rmse,auc,logloss = evaluator.getOverlapBasedMetrics(\n",
    "    dataset.users_test,\n",
    "    dataset.items_test,\n",
    "    dataset.ratings_test\n",
    ")\n",
    "rmse,auc,logloss"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Confirmed that results from both ways are the same!!!"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### try to save/load the complete model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "gmf.model.save('../metadata/gmf/gmf-best.hdf5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/xyin/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/indexed_slices.py:433: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.\n",
      "  \"Converting sparse IndexedSlices to a dense Tensor of unknown shape. \"\n"
     ]
    }
   ],
   "source": [
    "model3 = keras.models.load_model('../metadata/gmf/gmf-best.hdf5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 6040/6040 [02:17<00:00, 44.01it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(0.20208704972160227, 0.8827614048663103, 0.13571090694790497)"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "evaluator = evaluation_grouped.metricsEval(\n",
    "    model=model3,\n",
    "    users=dataset.users,\n",
    "    items=dataset.items\n",
    ")\n",
    "evaluator.getRecs()\n",
    "rmse,auc,logloss = evaluator.getOverlapBasedMetrics(\n",
    "    dataset.users_test,\n",
    "    dataset.items_test,\n",
    "    dataset.ratings_test\n",
    ")\n",
    "rmse,auc,logloss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}