{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Test the NCF modules under folder [cf_ec2](../cf_ec2) with ml-1m dataset, load the best model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "import numpy as np \n",
    "import pandas as pd\n",
    "import keras\n",
    "from keras import Model\n",
    "from keras.regularizers import l2\n",
    "from keras.optimizers import (\n",
    "    Adam,\n",
    "    Adamax,\n",
    "    Adagrad,\n",
    "    SGD,\n",
    "    RMSprop\n",
    ")\n",
    "from keras.layers import (\n",
    "    Embedding, \n",
    "    Input,\n",
    "    Flatten, \n",
    "    Multiply, \n",
    "    Concatenate,\n",
    "    Dense\n",
    ")\n",
    "\n",
    "import sys\n",
    "sys.path.append('../')\n",
    "from cf_ec2 import (\n",
    "    GMF,\n",
    "    MLP,\n",
    "    NCF,\n",
    "    Data,\n",
    "    evaluation,\n",
    "    evaluation2\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## step 1: load the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = pd.read_csv('../data/ml-1m.train.rating',sep='\\t',header=None,names=['user','item','rating','event_ts'])\n",
    "test = pd.read_csv('../data/ml-1m.test.rating',sep='\\t',header=None,names=['user','item','rating','event_ts'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>item</th>\n",
       "      <th>rating</th>\n",
       "      <th>event_ts</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>32</td>\n",
       "      <td>4</td>\n",
       "      <td>978824330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>34</td>\n",
       "      <td>4</td>\n",
       "      <td>978824330</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>978824291</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user  item  rating   event_ts\n",
       "0     0    32       4  978824330\n",
       "1     0    34       4  978824330\n",
       "2     0     4       5  978824291"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user</th>\n",
       "      <th>item</th>\n",
       "      <th>rating</th>\n",
       "      <th>event_ts</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>25</td>\n",
       "      <td>5</td>\n",
       "      <td>978824351</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>133</td>\n",
       "      <td>3</td>\n",
       "      <td>978300174</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>207</td>\n",
       "      <td>4</td>\n",
       "      <td>978298504</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   user  item  rating   event_ts\n",
       "0     0    25       5  978824351\n",
       "1     1   133       3  978300174\n",
       "2     2   207       4  978298504"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(6040, (6040, 4))"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test.user.nunique(), test.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## step 2: prepare the data for ncf model training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = Data(\n",
    "    train=train,\n",
    "    test=test,\n",
    "    col_user='user',\n",
    "    col_item='item',\n",
    "    col_rating='rating',\n",
    "    col_time='event_ts',\n",
    "    binary=True,\n",
    "    n_neg=4,\n",
    "    n_neg_test=100\n",
    ")\n",
    "dataset.prepTrainDNN()\n",
    "dataset.prepTestDNN(group=True)\n",
    "dataset.negativeSampling()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### prepare the test dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "newItems = set(dataset.items_test)-set(dataset.items)\n",
    "idx2del = []\n",
    "for idx,item in enumerate(dataset.items_test):\n",
    "    if item in newItems:\n",
    "        idx2del.append(idx)\n",
    "\n",
    "length_test_original = len(dataset.users_test)\n",
    "dataset.users_test = [\n",
    "    dataset.users_test[idx]\n",
    "    for idx in range(length_test_original) if idx not in idx2del\n",
    "]\n",
    "dataset.items_test = [\n",
    "    dataset.items_test[idx]\n",
    "    for idx in range(length_test_original) if idx not in idx2del\n",
    "]\n",
    "dataset.ratings_test = [\n",
    "    dataset.ratings_test[idx]\n",
    "    for idx in range(length_test_original) if idx not in idx2del\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## step 3: create the model architecture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "n_users = 6040\n",
    "n_items = 3704\n",
    "n_factors_gmf = 32\n",
    "layers_mlp = [64,32,16,8]\n",
    "reg_gmf = 0.\n",
    "reg_layers_mlp = [0.,0.,0.,0.]\n",
    "learning_rate = 0.01\n",
    "flg_pretrain = ''\n",
    "filepath = ''\n",
    "filepath_gmf_pretrain = ''\n",
    "filepath_mlp_pretrain = ''\n",
    "num_epochs = 20\n",
    "batch_size = 100\n",
    "\n",
    "ncf = NCF(\n",
    "    n_users=n_users,\n",
    "    n_items=n_items,\n",
    "    n_factors_gmf=n_factors_gmf,\n",
    "    layers_mlp=layers_mlp,\n",
    "    reg_gmf=reg_gmf,\n",
    "    reg_layers_mlp=reg_layers_mlp\n",
    ")\n",
    "model = ncf.create_model()\n",
    "model.load_weights('../metadata/ncf/ncf_model_best')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "#### compile the model\n",
    "model.compile(\n",
    "    optimizer=Adam(lr=learning_rate),\n",
    "    loss='binary_crossentropy',\n",
    "    metrics=['accuracy']\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# estimate accuracy on whole dataset using loaded weights\n",
    "scores = model.evaluate(\n",
    "    x = [\n",
    "        np.array(dataset.users_test),\n",
    "        np.array(dataset.items_test)\n",
    "    ],\n",
    "    y = np.array(dataset.ratings_test),\n",
    "    verbose=0\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.1269758473972751, 0.948145866394043]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['loss', 'accuracy']"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.metrics_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}