{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Test the NCF modules under folder [cf_ec2](../cf_ec2) with ml-1m dataset, load the best model"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"import numpy as np \n",
"import pandas as pd\n",
"import keras\n",
"from keras import Model\n",
"from keras.regularizers import l2\n",
"from keras.optimizers import (\n",
" Adam,\n",
" Adamax,\n",
" Adagrad,\n",
" SGD,\n",
" RMSprop\n",
")\n",
"from keras.layers import (\n",
" Embedding, \n",
" Input,\n",
" Flatten, \n",
" Multiply, \n",
" Concatenate,\n",
" Dense\n",
")\n",
"\n",
"import sys\n",
"sys.path.append('../')\n",
"from cf_ec2 import (\n",
" GMF,\n",
" MLP,\n",
" NCF,\n",
" Data,\n",
" evaluation,\n",
" evaluation2\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## step 1: load the data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"train = pd.read_csv('../data/ml-1m.train.rating',sep='\\t',header=None,names=['user','item','rating','event_ts'])\n",
"test = pd.read_csv('../data/ml-1m.test.rating',sep='\\t',header=None,names=['user','item','rating','event_ts'])"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user | \n",
" item | \n",
" rating | \n",
" event_ts | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 32 | \n",
" 4 | \n",
" 978824330 | \n",
"
\n",
" \n",
" | 1 | \n",
" 0 | \n",
" 34 | \n",
" 4 | \n",
" 978824330 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0 | \n",
" 4 | \n",
" 5 | \n",
" 978824291 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user item rating event_ts\n",
"0 0 32 4 978824330\n",
"1 0 34 4 978824330\n",
"2 0 4 5 978824291"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" user | \n",
" item | \n",
" rating | \n",
" event_ts | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 25 | \n",
" 5 | \n",
" 978824351 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 133 | \n",
" 3 | \n",
" 978300174 | \n",
"
\n",
" \n",
" | 2 | \n",
" 2 | \n",
" 207 | \n",
" 4 | \n",
" 978298504 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" user item rating event_ts\n",
"0 0 25 5 978824351\n",
"1 1 133 3 978300174\n",
"2 2 207 4 978298504"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(6040, (6040, 4))"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test.user.nunique(), test.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## step 2: prepare the data for ncf model training"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"dataset = Data(\n",
" train=train,\n",
" test=test,\n",
" col_user='user',\n",
" col_item='item',\n",
" col_rating='rating',\n",
" col_time='event_ts',\n",
" binary=True,\n",
" n_neg=4,\n",
" n_neg_test=100\n",
")\n",
"dataset.prepTrainDNN()\n",
"dataset.prepTestDNN(group=True)\n",
"dataset.negativeSampling()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### prepare the test dataset"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"newItems = set(dataset.items_test)-set(dataset.items)\n",
"idx2del = []\n",
"for idx,item in enumerate(dataset.items_test):\n",
" if item in newItems:\n",
" idx2del.append(idx)\n",
"\n",
"length_test_original = len(dataset.users_test)\n",
"dataset.users_test = [\n",
" dataset.users_test[idx]\n",
" for idx in range(length_test_original) if idx not in idx2del\n",
"]\n",
"dataset.items_test = [\n",
" dataset.items_test[idx]\n",
" for idx in range(length_test_original) if idx not in idx2del\n",
"]\n",
"dataset.ratings_test = [\n",
" dataset.ratings_test[idx]\n",
" for idx in range(length_test_original) if idx not in idx2del\n",
"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## step 3: create the model architecture"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"n_users = 6040\n",
"n_items = 3704\n",
"n_factors_gmf = 32\n",
"layers_mlp = [64,32,16,8]\n",
"reg_gmf = 0.\n",
"reg_layers_mlp = [0.,0.,0.,0.]\n",
"learning_rate = 0.01\n",
"flg_pretrain = ''\n",
"filepath = ''\n",
"filepath_gmf_pretrain = ''\n",
"filepath_mlp_pretrain = ''\n",
"num_epochs = 20\n",
"batch_size = 100\n",
"\n",
"ncf = NCF(\n",
" n_users=n_users,\n",
" n_items=n_items,\n",
" n_factors_gmf=n_factors_gmf,\n",
" layers_mlp=layers_mlp,\n",
" reg_gmf=reg_gmf,\n",
" reg_layers_mlp=reg_layers_mlp\n",
")\n",
"model = ncf.create_model()\n",
"model.load_weights('../metadata/ncf/ncf_model_best')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"#### compile the model\n",
"model.compile(\n",
" optimizer=Adam(lr=learning_rate),\n",
" loss='binary_crossentropy',\n",
" metrics=['accuracy']\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# estimate accuracy on whole dataset using loaded weights\n",
"scores = model.evaluate(\n",
" x = [\n",
" np.array(dataset.users_test),\n",
" np.array(dataset.items_test)\n",
" ],\n",
" y = np.array(dataset.ratings_test),\n",
" verbose=0\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0.1269758473972751, 0.948145866394043]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scores"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['loss', 'accuracy']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.metrics_names"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}