{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Test the NCF modules under folder [cf_ec2](../cf_ec2) with ml-1m dataset, load the best model" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] } ], "source": [ "import numpy as np \n", "import pandas as pd\n", "import keras\n", "from keras import Model\n", "from keras.regularizers import l2\n", "from keras.optimizers import (\n", " Adam,\n", " Adamax,\n", " Adagrad,\n", " SGD,\n", " RMSprop\n", ")\n", "from keras.layers import (\n", " Embedding, \n", " Input,\n", " Flatten, \n", " Multiply, \n", " Concatenate,\n", " Dense\n", ")\n", "\n", "import sys\n", "sys.path.append('../')\n", "from cf_ec2 import (\n", " GMF,\n", " MLP,\n", " NCF,\n", " Data,\n", " evaluation,\n", " evaluation2\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## step 1: load the data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "train = pd.read_csv('../data/ml-1m.train.rating',sep='\\t',header=None,names=['user','item','rating','event_ts'])\n", "test = pd.read_csv('../data/ml-1m.test.rating',sep='\\t',header=None,names=['user','item','rating','event_ts'])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useritemratingevent_ts
00324978824330
10344978824330
2045978824291
\n", "
" ], "text/plain": [ " user item rating event_ts\n", "0 0 32 4 978824330\n", "1 0 34 4 978824330\n", "2 0 4 5 978824291" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head(3)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
useritemratingevent_ts
00255978824351
111333978300174
222074978298504
\n", "
" ], "text/plain": [ " user item rating event_ts\n", "0 0 25 5 978824351\n", "1 1 133 3 978300174\n", "2 2 207 4 978298504" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.head(3)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(6040, (6040, 4))" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.user.nunique(), test.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## step 2: prepare the data for ncf model training" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "dataset = Data(\n", " train=train,\n", " test=test,\n", " col_user='user',\n", " col_item='item',\n", " col_rating='rating',\n", " col_time='event_ts',\n", " binary=True,\n", " n_neg=4,\n", " n_neg_test=100\n", ")\n", "dataset.prepTrainDNN()\n", "dataset.prepTestDNN(group=True)\n", "dataset.negativeSampling()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### prepare the test dataset" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "newItems = set(dataset.items_test)-set(dataset.items)\n", "idx2del = []\n", "for idx,item in enumerate(dataset.items_test):\n", " if item in newItems:\n", " idx2del.append(idx)\n", "\n", "length_test_original = len(dataset.users_test)\n", "dataset.users_test = [\n", " dataset.users_test[idx]\n", " for idx in range(length_test_original) if idx not in idx2del\n", "]\n", "dataset.items_test = [\n", " dataset.items_test[idx]\n", " for idx in range(length_test_original) if idx not in idx2del\n", "]\n", "dataset.ratings_test = [\n", " dataset.ratings_test[idx]\n", " for idx in range(length_test_original) if idx not in idx2del\n", "]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## step 3: create the model architecture" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [], "source": [ "n_users = 6040\n", "n_items = 3704\n", "n_factors_gmf = 32\n", "layers_mlp = [64,32,16,8]\n", "reg_gmf = 0.\n", "reg_layers_mlp = [0.,0.,0.,0.]\n", "learning_rate = 0.01\n", "flg_pretrain = ''\n", "filepath = ''\n", "filepath_gmf_pretrain = ''\n", "filepath_mlp_pretrain = ''\n", "num_epochs = 20\n", "batch_size = 100\n", "\n", "ncf = NCF(\n", " n_users=n_users,\n", " n_items=n_items,\n", " n_factors_gmf=n_factors_gmf,\n", " layers_mlp=layers_mlp,\n", " reg_gmf=reg_gmf,\n", " reg_layers_mlp=reg_layers_mlp\n", ")\n", "model = ncf.create_model()\n", "model.load_weights('../metadata/ncf/ncf_model_best')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#### compile the model\n", "model.compile(\n", " optimizer=Adam(lr=learning_rate),\n", " loss='binary_crossentropy',\n", " metrics=['accuracy']\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# estimate accuracy on whole dataset using loaded weights\n", "scores = model.evaluate(\n", " x = [\n", " np.array(dataset.users_test),\n", " np.array(dataset.items_test)\n", " ],\n", " y = np.array(dataset.ratings_test),\n", " verbose=0\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.1269758473972751, 0.948145866394043]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scores" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['loss', 'accuracy']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.metrics_names" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }