{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fastai.collab import * # Quick access to collab filtering functionality" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Collaborative filtering example" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "`collab` models use data in a `DataFrame` of user, items, and ratings." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PosixPath('/home/ubuntu/.fastai/data/movie_lens_sample')" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path = untar_data(URLs.ML_SAMPLE)\n", "path" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
userIdmovieIdratingtimestamp
07310974.01255504951
15619243.51172695223
21572603.51291598691
335812105.0957481884
41303162.01138999234
\n", "
" ], "text/plain": [ " userId movieId rating timestamp\n", "0 73 1097 4.0 1255504951\n", "1 561 924 3.5 1172695223\n", "2 157 260 3.5 1291598691\n", "3 358 1210 5.0 957481884\n", "4 130 316 2.0 1138999234" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ratings = pd.read_csv(path/'ratings.csv')\n", "series2cat(ratings, 'userId', 'movieId')\n", "ratings.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = CollabDataBunch.from_df(ratings, seed=42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_range = [0, 5.5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "That's all we need to create and train a model:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "Total time: 00:02

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
epochtrain_lossvalid_losstime
01.7790861.26561600:00
10.9094570.68535100:00
20.6848180.66628200:00
30.5937940.65988600:00
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "learn = collab_learner(data, n_factors=50, y_range=y_range)\n", "learn.fit_one_cycle(4, 5e-3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(FloatItem 4.166797, tensor(4.1668), tensor(4.1668))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "learn.predict(ratings.iloc[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }