{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from fastai.collab import * # Quick access to collab filtering functionality"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Collaborative filtering example"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`collab` models use data in a `DataFrame` of user, items, and ratings."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/home/ubuntu/.fastai/data/movie_lens_sample')"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"path = untar_data(URLs.ML_SAMPLE)\n",
"path"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" userId | \n",
" movieId | \n",
" rating | \n",
" timestamp | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 73 | \n",
" 1097 | \n",
" 4.0 | \n",
" 1255504951 | \n",
"
\n",
" \n",
" 1 | \n",
" 561 | \n",
" 924 | \n",
" 3.5 | \n",
" 1172695223 | \n",
"
\n",
" \n",
" 2 | \n",
" 157 | \n",
" 260 | \n",
" 3.5 | \n",
" 1291598691 | \n",
"
\n",
" \n",
" 3 | \n",
" 358 | \n",
" 1210 | \n",
" 5.0 | \n",
" 957481884 | \n",
"
\n",
" \n",
" 4 | \n",
" 130 | \n",
" 316 | \n",
" 2.0 | \n",
" 1138999234 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" userId movieId rating timestamp\n",
"0 73 1097 4.0 1255504951\n",
"1 561 924 3.5 1172695223\n",
"2 157 260 3.5 1291598691\n",
"3 358 1210 5.0 957481884\n",
"4 130 316 2.0 1138999234"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ratings = pd.read_csv(path/'ratings.csv')\n",
"series2cat(ratings, 'userId', 'movieId')\n",
"ratings.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = CollabDataBunch.from_df(ratings, seed=42)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y_range = [0, 5.5]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"That's all we need to create and train a model:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Total time: 00:02 \n",
" \n",
" \n",
" epoch | \n",
" train_loss | \n",
" valid_loss | \n",
" time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1.779086 | \n",
" 1.265616 | \n",
" 00:00 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.909457 | \n",
" 0.685351 | \n",
" 00:00 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.684818 | \n",
" 0.666282 | \n",
" 00:00 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.593794 | \n",
" 0.659886 | \n",
" 00:00 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"learn = collab_learner(data, n_factors=50, y_range=y_range)\n",
"learn.fit_one_cycle(4, 5e-3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(FloatItem 4.166797, tensor(4.1668), tensor(4.1668))"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"learn.predict(ratings.iloc[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}