{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "***\n", "***\n", "# 使用Turicreate进行电影推荐\n", "***\n", "***\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:01:17.612752Z", "start_time": "2019-06-15T07:01:16.946694Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "import turicreate as tc\n", "# set canvas to show sframes and sgraphs in ipython notebook\n", "# import matplotlib.pyplot as plt\n", "# %matplotlib inline" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "# download data from: http://files.grouplens.org/datasets/movielens/ml-1m.zip" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:01:45.368349Z", "start_time": "2019-06-15T07:01:35.370047Z" }, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat
" ], "text/plain": [ "Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Parsing completed. Parsed 100 lines in 0.281192 secs.
" ], "text/plain": [ "Parsing completed. Parsed 100 lines in 0.281192 secs." ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------\n", "Inferred types from first 100 line(s) of file as \n", "column_type_hints=[str]\n", "If parsing fails due to incorrect types, you can correct\n", "the inferred type list above and pass it to read_csv in\n", "the column_type_hints argument\n", "------------------------------------------------------\n" ] }, { "data": { "text/html": [ "
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat
" ], "text/plain": [ "Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Parsing completed. Parsed 1000209 lines in 0.372092 secs.
" ], "text/plain": [ "Parsing completed. Parsed 1000209 lines in 0.372092 secs." ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = tc.SFrame.read_csv('/Users/datalab/bigdata/cjc/ml-1m/ratings.dat', delimiter='\\n', \n", " header=False)['X1'].apply(lambda x: x.split('::')).unpack()\n", "for col in data.column_names():\n", " data[col] = data[col].astype(int)\n", "data = data.rename({'X.0': 'user_id', 'X.1': 'movie_id', 'X.2': 'rating', 'X.3': 'timestamp'})\n", "#data.save('ratings')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:02:14.283554Z", "start_time": "2019-06-15T07:02:14.143619Z" }, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat
" ], "text/plain": [ "Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Parsing completed. Parsed 100 lines in 0.028041 secs.
" ], "text/plain": [ "Parsing completed. Parsed 100 lines in 0.028041 secs." ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "------------------------------------------------------\n", "Inferred types from first 100 line(s) of file as \n", "column_type_hints=[str]\n", "If parsing fails due to incorrect types, you can correct\n", "the inferred type list above and pass it to read_csv in\n", "the column_type_hints argument\n", "------------------------------------------------------\n" ] }, { "data": { "text/html": [ "
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat
" ], "text/plain": [ "Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Parsing completed. Parsed 6040 lines in 0.007235 secs.
" ], "text/plain": [ "Parsing completed. Parsed 6040 lines in 0.007235 secs." ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "users = tc.SFrame.read_csv('/Users/datalab/bigdata/cjc/ml-1m/users.dat', delimiter='\\n', \n", " header=False)['X1'].apply(lambda x: x.split('::')).unpack()\n", "users = users.rename({'X.0': 'user_id', 'X.1': 'gender', 'X.2': 'age', 'X.3': 'occupation', 'X.4': 'zip-code'})\n", "users['user_id'] = users['user_id'].astype(int)\n", "users.save('users')\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:22:48.875374Z", "start_time": "2019-06-14T16:22:48.872581Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "#items = tc.SFrame.read_csv('/Users/datalab/bigdata/ml-1m/movies.dat', delimiter='\\n', header=False)#['X1'].apply(lambda x: x.split('::')).unpack()\n", "# items = items.rename({'X.0': 'movie_id', 'X.1': 'title', 'X.2': 'genre'})\n", "# items['movie_id'] = items['movie_id'].astype(int)\n", "# items.save('items')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:02:25.813647Z", "start_time": "2019-06-15T07:02:25.785898Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idmovie_idratingtimestamp
111935978300760
16613978302109
19143978301968
134084978300275
123555978824291
111973978302268
112875978302039
128045978300719
15944978302268
19194978301368
\n", "[1000209 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tuser_id\tint\n", "\tmovie_id\tint\n", "\trating\tint\n", "\ttimestamp\tint\n", "\n", "Rows: 1000209\n", "\n", "Data:\n", "+---------+----------+--------+-----------+\n", "| user_id | movie_id | rating | timestamp |\n", "+---------+----------+--------+-----------+\n", "| 1 | 1193 | 5 | 978300760 |\n", "| 1 | 661 | 3 | 978302109 |\n", "| 1 | 914 | 3 | 978301968 |\n", "| 1 | 3408 | 4 | 978300275 |\n", "| 1 | 2355 | 5 | 978824291 |\n", "| 1 | 1197 | 3 | 978302268 |\n", "| 1 | 1287 | 5 | 978302039 |\n", "| 1 | 2804 | 5 | 978300719 |\n", "| 1 | 594 | 4 | 978302268 |\n", "| 1 | 919 | 4 | 978301368 |\n", "+---------+----------+--------+-----------+\n", "[1000209 rows x 4 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:22:57.906209Z", "start_time": "2019-06-14T16:22:57.903728Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "#items" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:02:48.315958Z", "start_time": "2019-06-15T07:02:48.294741Z" }, "slideshow": { "slide_type": "subslide" } }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idgenderageoccupationzip-code
1F11048067
2M561670072
3M251555117
4M45702460
5M252055455
6F50955117
7M35106810
8M251211413
9M251761614
10F35195370
\n", "[6040 rows x 5 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tuser_id\tint\n", "\tgender\tstr\n", "\tage\tstr\n", "\toccupation\tstr\n", "\tzip-code\tstr\n", "\n", "Rows: 6040\n", "\n", "Data:\n", "+---------+--------+-----+------------+----------+\n", "| user_id | gender | age | occupation | zip-code |\n", "+---------+--------+-----+------------+----------+\n", "| 1 | F | 1 | 10 | 48067 |\n", "| 2 | M | 56 | 16 | 70072 |\n", "| 3 | M | 25 | 15 | 55117 |\n", "| 4 | M | 45 | 7 | 02460 |\n", "| 5 | M | 25 | 20 | 55455 |\n", "| 6 | F | 50 | 9 | 55117 |\n", "| 7 | M | 35 | 1 | 06810 |\n", "| 8 | M | 25 | 12 | 11413 |\n", "| 9 | M | 25 | 17 | 61614 |\n", "| 10 | F | 35 | 1 | 95370 |\n", "+---------+--------+-----+------------+----------+\n", "[6040 rows x 5 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:33:14.874154Z", "start_time": "2018-05-05T06:33:13.913325Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "#data = data.join(items, on='movie_id')" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:23:16.893160Z", "start_time": "2019-06-14T16:23:16.890595Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "#data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:03:02.788476Z", "start_time": "2019-06-15T07:03:02.784988Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "train_set, test_set = data.random_split(0.95, seed=1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:03:33.095594Z", "start_time": "2019-06-15T07:03:12.909391Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
Preparing data set.
" ], "text/plain": [ "Preparing data set." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data has 949852 observations with 6040 users and 3701 items.
" ], "text/plain": [ " Data has 949852 observations with 6040 users and 3701 items." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data prepared in: 0.550091s
" ], "text/plain": [ " Data prepared in: 0.550091s" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Training ranking_factorization_recommender for recommendations.
" ], "text/plain": [ "Training ranking_factorization_recommender for recommendations." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+--------------------------------------------------+----------+
" ], "text/plain": [ "+--------------------------------+--------------------------------------------------+----------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Parameter                      | Description                                      | Value    |
" ], "text/plain": [ "| Parameter | Description | Value |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+--------------------------------------------------+----------+
" ], "text/plain": [ "+--------------------------------+--------------------------------------------------+----------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| num_factors                    | Factor Dimension                                 | 32       |
" ], "text/plain": [ "| num_factors | Factor Dimension | 32 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| regularization                 | L2 Regularization on Factors                     | 1e-09    |
" ], "text/plain": [ "| regularization | L2 Regularization on Factors | 1e-09 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| solver                         | Solver used for training                         | adagrad  |
" ], "text/plain": [ "| solver | Solver used for training | adagrad |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| linear_regularization          | L2 Regularization on Linear Coefficients         | 1e-09    |
" ], "text/plain": [ "| linear_regularization | L2 Regularization on Linear Coefficients | 1e-09 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| ranking_regularization         | Rank-based Regularization Weight                 | 0.25     |
" ], "text/plain": [ "| ranking_regularization | Rank-based Regularization Weight | 0.25 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| max_iterations                 | Maximum Number of Iterations                     | 25       |
" ], "text/plain": [ "| max_iterations | Maximum Number of Iterations | 25 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+--------------------------------------------------+----------+
" ], "text/plain": [ "+--------------------------------+--------------------------------------------------+----------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
  Optimizing model using SGD; tuning step size.
" ], "text/plain": [ " Optimizing model using SGD; tuning step size." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
  Using 118731 / 949852 points for tuning the step size.
" ], "text/plain": [ " Using 118731 / 949852 points for tuning the step size." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+
" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Attempt | Initial Step Size | Estimated Objective Value                |
" ], "text/plain": [ "| Attempt | Initial Step Size | Estimated Objective Value |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+
" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 0       | 16.6667           | Not Viable                               |
" ], "text/plain": [ "| 0 | 16.6667 | Not Viable |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 1       | 4.16667           | Not Viable                               |
" ], "text/plain": [ "| 1 | 4.16667 | Not Viable |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 2       | 1.04167           | Not Viable                               |
" ], "text/plain": [ "| 2 | 1.04167 | Not Viable |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 3       | 0.260417          | Not Viable                               |
" ], "text/plain": [ "| 3 | 0.260417 | Not Viable |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 4       | 0.0651042         | 1.8722                                   |
" ], "text/plain": [ "| 4 | 0.0651042 | 1.8722 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 5       | 0.0325521         | 1.94425                                  |
" ], "text/plain": [ "| 5 | 0.0325521 | 1.94425 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 6       | 0.016276          | 1.95877                                  |
" ], "text/plain": [ "| 6 | 0.016276 | 1.95877 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 7       | 0.00813802        | 2.0441                                   |
" ], "text/plain": [ "| 7 | 0.00813802 | 2.0441 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+
" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Final   | 0.0651042         | 1.8722                                   |
" ], "text/plain": [ "| Final | 0.0651042 | 1.8722 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+
" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Starting Optimization.
" ], "text/plain": [ "Starting Optimization." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+
" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Iter.   | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size   |
" ], "text/plain": [ "| Iter. | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+
" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Initial | 110us        | 2.44718           | 1.1172                |             |
" ], "text/plain": [ "| Initial | 110us | 2.44718 | 1.1172 | |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+
" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 1       | 536.251ms    | 2.09737           | 1.13925               | 0.0651042   |
" ], "text/plain": [ "| 1 | 536.251ms | 2.09737 | 1.13925 | 0.0651042 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 2       | 1.05s        | 1.85594           | 1.06079               | 0.0651042   |
" ], "text/plain": [ "| 2 | 1.05s | 1.85594 | 1.06079 | 0.0651042 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 3       | 1.55s        | 1.79883           | 1.03161               | 0.0651042   |
" ], "text/plain": [ "| 3 | 1.55s | 1.79883 | 1.03161 | 0.0651042 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 4       | 2.06s        | 1.77231           | 1.02676               | 0.0651042   |
" ], "text/plain": [ "| 4 | 2.06s | 1.77231 | 1.02676 | 0.0651042 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 5       | 2.57s        | 1.75455           | 1.02264               | 0.0651042   |
" ], "text/plain": [ "| 5 | 2.57s | 1.75455 | 1.02264 | 0.0651042 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 10      | 5.81s        | 1.66968           | 0.995516              | 0.0651042   |
" ], "text/plain": [ "| 10 | 5.81s | 1.66968 | 0.995516 | 0.0651042 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 20      | 12.34s       | 1.58039           | 0.969493              | 0.0651042   |
" ], "text/plain": [ "| 20 | 12.34s | 1.58039 | 0.969493 | 0.0651042 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 25      | 15.69s       | 1.54869           | 0.961055              | 0.0651042   |
" ], "text/plain": [ "| 25 | 15.69s | 1.54869 | 0.961055 | 0.0651042 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+
" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Optimization Complete: Maximum number of passes through the data reached.
" ], "text/plain": [ "Optimization Complete: Maximum number of passes through the data reached." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Computing final objective value and training RMSE.
" ], "text/plain": [ "Computing final objective value and training RMSE." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
       Final objective value: 1.57752
" ], "text/plain": [ " Final objective value: 1.57752" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
       Final training RMSE: 0.95536
" ], "text/plain": [ " Final training RMSE: 0.95536" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "m = tc.recommender.create(train_set, 'user_id', 'movie_id', 'rating')" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:23:51.502615Z", "start_time": "2019-06-14T16:23:51.482326Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "Class : RankingFactorizationRecommender\n", "\n", "Schema\n", "------\n", "User ID : user_id\n", "Item ID : movie_id\n", "Target : rating\n", "Additional observation features : 1\n", "User side features : []\n", "Item side features : []\n", "\n", "Statistics\n", "----------\n", "Number of observations : 949852\n", "Number of users : 6040\n", "Number of items : 3701\n", "\n", "Training summary\n", "----------------\n", "Training time : 21.9973\n", "\n", "Model Parameters\n", "----------------\n", "Model class : RankingFactorizationRecommender\n", "num_factors : 32\n", "binary_target : 0\n", "side_data_factorization : 1\n", "solver : auto\n", "nmf : 0\n", "max_iterations : 25\n", "\n", "Regularization Settings\n", "-----------------------\n", "regularization : 0.0\n", "regularization_type : normal\n", "linear_regularization : 0.0\n", "ranking_regularization : 0.25\n", "unobserved_rating_value : -1.7976931348623157e+308\n", "num_sampled_negative_examples : 4\n", "ials_confidence_scaling_type : auto\n", "ials_confidence_scaling_factor : 1\n", "\n", "Optimization Settings\n", "---------------------\n", "init_random_sigma : 0.01\n", "sgd_convergence_interval : 4\n", "sgd_convergence_threshold : 0.0\n", "sgd_max_trial_iterations : 5\n", "sgd_sampling_block_size : 131072\n", "sgd_step_adjustment_interval : 4\n", "sgd_step_size : 0.0\n", "sgd_trial_sample_minimum_size : 10000\n", "sgd_trial_sample_proportion : 0.125\n", "step_size_decrease_rate : 0.75\n", "additional_iterations_if_unhealthy : 5\n", "adagrad_momentum_weighting : 0.9\n", "num_tempering_iterations : 4\n", "tempering_regularization_start_value : 0.0\n", "track_exact_loss : 0" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:04:21.017511Z", "start_time": "2019-06-15T07:04:17.764265Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
Warning: Ignoring columns timestamp;
" ], "text/plain": [ "Warning: Ignoring columns timestamp;" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    To use these columns in scoring predictions, use a model that allows the use of additional features.
" ], "text/plain": [ " To use these columns in scoring predictions, use a model that allows the use of additional features." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Preparing data set.
" ], "text/plain": [ "Preparing data set." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data has 949852 observations with 6040 users and 3701 items.
" ], "text/plain": [ " Data has 949852 observations with 6040 users and 3701 items." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data prepared in: 0.426101s
" ], "text/plain": [ " Data prepared in: 0.426101s" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Training model from provided data.
" ], "text/plain": [ "Training model from provided data." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Gathering per-item and per-user statistics.
" ], "text/plain": [ "Gathering per-item and per-user statistics." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+------------+
" ], "text/plain": [ "+--------------------------------+------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Elapsed Time (Item Statistics) | % Complete |
" ], "text/plain": [ "| Elapsed Time (Item Statistics) | % Complete |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+------------+
" ], "text/plain": [ "+--------------------------------+------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 27.234ms                       | 16.5       |
" ], "text/plain": [ "| 27.234ms | 16.5 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 42.954ms                       | 100        |
" ], "text/plain": [ "| 42.954ms | 100 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+------------+
" ], "text/plain": [ "+--------------------------------+------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Setting up lookup tables.
" ], "text/plain": [ "Setting up lookup tables." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Processing data in one pass using dense lookup tables.
" ], "text/plain": [ "Processing data in one pass using dense lookup tables." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+-------------------------------------+------------------+-----------------+
" ], "text/plain": [ "+-------------------------------------+------------------+-----------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |
" ], "text/plain": [ "| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+-------------------------------------+------------------+-----------------+
" ], "text/plain": [ "+-------------------------------------+------------------+-----------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 73.627ms                            | 0                | 2               |
" ], "text/plain": [ "| 73.627ms | 0 | 2 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 2.79s                               | 100              | 3701            |
" ], "text/plain": [ "| 2.79s | 100 | 3701 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+-------------------------------------+------------------+-----------------+
" ], "text/plain": [ "+-------------------------------------+------------------+-----------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Finalizing lookup tables.
" ], "text/plain": [ "Finalizing lookup tables." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Generating candidate set for working with new users.
" ], "text/plain": [ "Generating candidate set for working with new users." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Finished training in 2.82252s
" ], "text/plain": [ "Finished training in 2.82252s" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "m2 = tc.item_similarity_recommender.create(train_set, \n", " 'user_id', 'movie_id', 'rating',\n", " similarity_type='pearson')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:04:26.908070Z", "start_time": "2019-06-15T07:04:26.896928Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "Class : ItemSimilarityRecommender\n", "\n", "Schema\n", "------\n", "User ID : user_id\n", "Item ID : movie_id\n", "Target : rating\n", "Additional observation features : 0\n", "User side features : []\n", "Item side features : []\n", "\n", "Statistics\n", "----------\n", "Number of observations : 949852\n", "Number of users : 6040\n", "Number of items : 3701\n", "\n", "Training summary\n", "----------------\n", "Training time : 2.8226\n", "\n", "Model Parameters\n", "----------------\n", "Model class : ItemSimilarityRecommender\n", "threshold : 0.001\n", "similarity_type : pearson\n", "training_method : auto\n", "\n", "Other Settings\n", "--------------\n", "max_data_passes : 4096\n", "max_item_neighborhood_size : 64\n", "nearest_neighbors_interaction_proportion_threshold : 0.05\n", "target_memory_usage : 8589934592\n", "sparse_density_estimation_sample_size : 4096\n", "degree_approximation_threshold : 4096\n", "seed_item_set_size : 50" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m2" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2019-06-15T07:04:50.789017Z", "start_time": "2019-06-15T07:04:47.875428Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "compare_models: using 2811 users to estimate model performance\n", "PROGRESS: Evaluate model M0\n" ] }, { "data": { "text/html": [ "
recommendations finished on 1000/2811 queries. users per second: 10084.7
" ], "text/plain": [ "recommendations finished on 1000/2811 queries. users per second: 10084.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 2000/2811 queries. users per second: 10557.4
" ], "text/plain": [ "recommendations finished on 2000/2811 queries. users per second: 10557.4" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Precision and recall summary statistics by cutoff\n", "+--------+----------------------+----------------------+\n", "| cutoff | mean_recall | mean_precision |\n", "+--------+----------------------+----------------------+\n", "| 1 | 0.004372314245294037 | 0.03344005691924596 |\n", "| 2 | 0.008439255238125647 | 0.030771967271433692 |\n", "| 3 | 0.011792773608123091 | 0.029764022293371297 |\n", "| 4 | 0.014103362205887681 | 0.027303450729277888 |\n", "| 5 | 0.017724646480050326 | 0.026894343649946677 |\n", "| 6 | 0.01985047799128097 | 0.02549507885687179 |\n", "| 7 | 0.023037645809147193 | 0.025054632311836182 |\n", "| 8 | 0.02564717744662357 | 0.024101743151903235 |\n", "| 9 | 0.027494985038662042 | 0.023123443614372085 |\n", "| 10 | 0.02954846065621093 | 0.022483102098897183 |\n", "+--------+----------------------+----------------------+\n", "[10 rows x 3 columns]\n", "\n", "\n", "Overall RMSE: 0.988323739301448\n", "\n", "Per User RMSE (best)\n", "+---------+----------------------+-------+\n", "| user_id | rmse | count |\n", "+---------+----------------------+-------+\n", "| 4695 | 0.008856667044261357 | 1 |\n", "+---------+----------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per User RMSE (worst)\n", "+---------+-------------------+-------+\n", "| user_id | rmse | count |\n", "+---------+-------------------+-------+\n", "| 1102 | 2.957562522855876 | 1 |\n", "+---------+-------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (best)\n", "+----------+----------------------+-------+\n", "| movie_id | rmse | count |\n", "+----------+----------------------+-------+\n", "| 3674 | 0.012974611607248221 | 1 |\n", "+----------+----------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (worst)\n", "+----------+--------------------+-------+\n", "| movie_id | rmse | count |\n", "+----------+--------------------+-------+\n", "| 3886 | 3.4432479133103597 | 1 |\n", "+----------+--------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "PROGRESS: Evaluate model M1\n" ] }, { "data": { "text/html": [ "
recommendations finished on 1000/2811 queries. users per second: 23065.4
" ], "text/plain": [ "recommendations finished on 1000/2811 queries. users per second: 23065.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 2000/2811 queries. users per second: 24766.9
" ], "text/plain": [ "recommendations finished on 2000/2811 queries. users per second: 24766.9" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Precision and recall summary statistics by cutoff\n", "+--------+-------------+----------------+\n", "| cutoff | mean_recall | mean_precision |\n", "+--------+-------------+----------------+\n", "| 1 | 0.0 | 0.0 |\n", "| 2 | 0.0 | 0.0 |\n", "| 3 | 0.0 | 0.0 |\n", "| 4 | 0.0 | 0.0 |\n", "| 5 | 0.0 | 0.0 |\n", "| 6 | 0.0 | 0.0 |\n", "| 7 | 0.0 | 0.0 |\n", "| 8 | 0.0 | 0.0 |\n", "| 9 | 0.0 | 0.0 |\n", "| 10 | 0.0 | 0.0 |\n", "+--------+-------------+----------------+\n", "[10 rows x 3 columns]\n", "\n", "\n", "Overall RMSE: 0.977554609754323\n", "\n", "Per User RMSE (best)\n", "+---------+-----------------------+-------+\n", "| user_id | rmse | count |\n", "+---------+-----------------------+-------+\n", "| 3872 | 4.440892098500626e-16 | 1 |\n", "+---------+-----------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per User RMSE (worst)\n", "+---------+--------------------+-------+\n", "| user_id | rmse | count |\n", "+---------+--------------------+-------+\n", "| 5214 | 3.2845314102161183 | 2 |\n", "+---------+--------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (best)\n", "+----------+------+-------+\n", "| movie_id | rmse | count |\n", "+----------+------+-------+\n", "| 1842 | 0.0 | 1 |\n", "+----------+------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (worst)\n", "+----------+------+-------+\n", "| movie_id | rmse | count |\n", "+----------+------+-------+\n", "| 572 | 4.0 | 1 |\n", "+----------+------+-------+\n", "[1 rows x 3 columns]\n", "\n" ] } ], "source": [ "result = tc.recommender.util.compare_models(test_set, \n", " [m, m2],\n", " user_sample=.5, skip_set=train_set)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Getting similar items" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:24:53.589323Z", "start_time": "2019-06-14T16:24:53.568580Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_idsimilarscorerank
128712620.89355385303497311
128712720.86842399835586552
128726620.86681872606277473
128733660.85481220483779914
128729480.85437524318695075
128730620.84941840171813966
128729470.84326535463333137
128738360.83848327398300178
128713040.83083325624465949
128712500.826753139495849610
\n", "[10 rows x 4 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tmovie_id\tint\n", "\tsimilar\tint\n", "\tscore\tfloat\n", "\trank\tint\n", "\n", "Rows: 10\n", "\n", "Data:\n", "+----------+---------+--------------------+------+\n", "| movie_id | similar | score | rank |\n", "+----------+---------+--------------------+------+\n", "| 1287 | 1262 | 0.8935538530349731 | 1 |\n", "| 1287 | 1272 | 0.8684239983558655 | 2 |\n", "| 1287 | 2662 | 0.8668187260627747 | 3 |\n", "| 1287 | 3366 | 0.8548122048377991 | 4 |\n", "| 1287 | 2948 | 0.8543752431869507 | 5 |\n", "| 1287 | 3062 | 0.8494184017181396 | 6 |\n", "| 1287 | 2947 | 0.8432653546333313 | 7 |\n", "| 1287 | 3836 | 0.8384832739830017 | 8 |\n", "| 1287 | 1304 | 0.8308332562446594 | 9 |\n", "| 1287 | 1250 | 0.8267531394958496 | 10 |\n", "+----------+---------+--------------------+------+\n", "[10 rows x 4 columns]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.get_similar_items([1287]) # movie_id is Ben-Hur" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:45:15.507691Z", "start_time": "2018-05-05T06:45:15.502035Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on method get_similar_items in module graphlab.toolkits.recommender.util:\n", "\n", "get_similar_items(self, items=None, k=10, verbose=False) method of graphlab.toolkits.recommender.ranking_factorization_recommender.RankingFactorizationRecommender instance\n", " Get the k most similar items for each item in items.\n", " \n", " Each type of recommender has its own model for the similarity\n", " between items. For example, the item_similarity_recommender will\n", " return the most similar items according to the user-chosen\n", " similarity; the factorization_recommender will return the\n", " nearest items based on the cosine similarity between latent item\n", " factors.\n", " \n", " Parameters\n", " ----------\n", " items : SArray or list; optional\n", " An :class:`~graphlab.SArray` or list of item ids for which to get\n", " similar items. If 'None', then return the `k` most similar items for\n", " all items in the training set.\n", " \n", " k : int, optional\n", " The number of similar items for each item.\n", " \n", " verbose : bool, optional\n", " Progress printing is shown.\n", " \n", " Returns\n", " -------\n", " out : SFrame\n", " A SFrame with the top ranked similar items for each item. The\n", " columns `item`, 'similar', 'score' and 'rank', where\n", " `item` matches the item column name specified at training time.\n", " The 'rank' is between 1 and `k` and 'score' gives the similarity\n", " score of that item. The value of the score depends on the method\n", " used for computing item similarities.\n", " \n", " Examples\n", " --------\n", " \n", " >>> sf = graphlab.SFrame({'user_id': [\"0\", \"0\", \"0\", \"1\", \"1\", \"2\", \"2\", \"2\"],\n", " 'item_id': [\"a\", \"b\", \"c\", \"a\", \"b\", \"b\", \"c\", \"d\"]})\n", " >>> m = graphlab.item_similarity_recommender.create(sf)\n", " >>> nn = m.get_similar_items()\n", "\n" ] } ], "source": [ "help(m.get_similar_items)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "'score' gives the similarity score of that item" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:25:06.875078Z", "start_time": "2019-06-14T16:25:06.872421Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "# m.get_similar_items([1287]).join(items, on={'similar': 'movie_id'}).sort('rank')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Making recommendations" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:25:10.176791Z", "start_time": "2019-06-14T16:25:09.614271Z" }, "slideshow": { "slide_type": "fragment" } }, "outputs": [ { "data": { "text/html": [ "
recommendations finished on 1000/6040 queries. users per second: 11685.2
" ], "text/plain": [ "recommendations finished on 1000/6040 queries. users per second: 11685.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 2000/6040 queries. users per second: 11654.4
" ], "text/plain": [ "recommendations finished on 2000/6040 queries. users per second: 11654.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 3000/6040 queries. users per second: 11658.6
" ], "text/plain": [ "recommendations finished on 3000/6040 queries. users per second: 11658.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 4000/6040 queries. users per second: 11321.5
" ], "text/plain": [ "recommendations finished on 4000/6040 queries. users per second: 11321.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 5000/6040 queries. users per second: 11502.9
" ], "text/plain": [ "recommendations finished on 5000/6040 queries. users per second: 11502.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 6000/6040 queries. users per second: 11105.9
" ], "text/plain": [ "recommendations finished on 6000/6040 queries. users per second: 11105.9" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "recs = m.recommend()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:25:12.909100Z", "start_time": "2019-06-14T16:25:12.888333Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idmovie_idscorerank
13185.0456226866637931
111984.8624240558540092
1504.766254748026063
15934.7661075171028844
18584.7477951522862185
111964.6893158320283166
128584.6789702538346527
123964.59866199157588358
11104.5883084710633039
125714.57340863607280110
\n", "[60400 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tuser_id\tint\n", "\tmovie_id\tint\n", "\tscore\tfloat\n", "\trank\tint\n", "\n", "Rows: 60400\n", "\n", "Data:\n", "+---------+----------+--------------------+------+\n", "| user_id | movie_id | score | rank |\n", "+---------+----------+--------------------+------+\n", "| 1 | 318 | 5.045622686663793 | 1 |\n", "| 1 | 1198 | 4.862424055854009 | 2 |\n", "| 1 | 50 | 4.76625474802606 | 3 |\n", "| 1 | 593 | 4.766107517102884 | 4 |\n", "| 1 | 858 | 4.747795152286218 | 5 |\n", "| 1 | 1196 | 4.689315832028316 | 6 |\n", "| 1 | 2858 | 4.678970253834652 | 7 |\n", "| 1 | 2396 | 4.5986619915758835 | 8 |\n", "| 1 | 110 | 4.588308471063303 | 9 |\n", "| 1 | 2571 | 4.573408636072801 | 10 |\n", "+---------+----------+--------------------+------+\n", "[60400 rows x 4 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recs" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:25:17.489601Z", "start_time": "2019-06-14T16:25:17.232492Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idmovie_idratingtimestamp
434685978294008
412103978293924
429514978294282
412144978294260
410364978294282
42605978294199
420285978294230
44804978294008
411962978294199
411985978294199
\n", "[? rows x 4 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.\n", "
" ], "text/plain": [ "Columns:\n", "\tuser_id\tint\n", "\tmovie_id\tint\n", "\trating\tint\n", "\ttimestamp\tint\n", "\n", "Rows: Unknown\n", "\n", "Data:\n", "+---------+----------+--------+-----------+\n", "| user_id | movie_id | rating | timestamp |\n", "+---------+----------+--------+-----------+\n", "| 4 | 3468 | 5 | 978294008 |\n", "| 4 | 1210 | 3 | 978293924 |\n", "| 4 | 2951 | 4 | 978294282 |\n", "| 4 | 1214 | 4 | 978294260 |\n", "| 4 | 1036 | 4 | 978294282 |\n", "| 4 | 260 | 5 | 978294199 |\n", "| 4 | 2028 | 5 | 978294230 |\n", "| 4 | 480 | 4 | 978294008 |\n", "| 4 | 1196 | 2 | 978294199 |\n", "| 4 | 1198 | 5 | 978294199 |\n", "+---------+----------+--------+-----------+\n", "[? rows x 4 columns]\n", "Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.\n", "You can use sf.materialize() to force materialization." ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[data['user_id'] == 4]" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:25:22.784880Z", "start_time": "2019-06-14T16:25:22.782529Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "# m.recommend(users=[4], k=20).join(items, on='movie_id')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Recommendations for new users" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:25:34.317252Z", "start_time": "2019-06-14T16:25:34.296992Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
movie_iduser_idrating
30999992
1000999991
900999993
883999994
251999990
200999990
199999991
180999991
120999991
991999992
\n", "[11 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tmovie_id\tint\n", "\tuser_id\tint\n", "\trating\tint\n", "\n", "Rows: 11\n", "\n", "Data:\n", "+----------+---------+--------+\n", "| movie_id | user_id | rating |\n", "+----------+---------+--------+\n", "| 30 | 99999 | 2 |\n", "| 1000 | 99999 | 1 |\n", "| 900 | 99999 | 3 |\n", "| 883 | 99999 | 4 |\n", "| 251 | 99999 | 0 |\n", "| 200 | 99999 | 0 |\n", "| 199 | 99999 | 1 |\n", "| 180 | 99999 | 1 |\n", "| 120 | 99999 | 1 |\n", "| 991 | 99999 | 2 |\n", "+----------+---------+--------+\n", "[11 rows x 3 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recent_data = tc.SFrame()\n", "recent_data['movie_id'] = [30, 1000, 900, 883, 251, 200, 199, 180, 120, 991, 1212] \n", "recent_data['user_id'] = 99999\n", "recent_data['rating'] = [2, 1, 3, 4, 0, 0, 1, 1, 1, 2, 3]\n", "recent_data" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:25:45.789843Z", "start_time": "2019-06-14T16:25:45.767211Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idmovie_idscorerank
9999938815.01
9999936075.02
9999918305.03
999999895.04
9999931725.05
9999932335.06
999997875.07
9999933825.08
9999936565.09
9999932805.010
\n", "[10 rows x 4 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tuser_id\tint\n", "\tmovie_id\tint\n", "\tscore\tfloat\n", "\trank\tint\n", "\n", "Rows: 10\n", "\n", "Data:\n", "+---------+----------+-------+------+\n", "| user_id | movie_id | score | rank |\n", "+---------+----------+-------+------+\n", "| 99999 | 3881 | 5.0 | 1 |\n", "| 99999 | 3607 | 5.0 | 2 |\n", "| 99999 | 1830 | 5.0 | 3 |\n", "| 99999 | 989 | 5.0 | 4 |\n", "| 99999 | 3172 | 5.0 | 5 |\n", "| 99999 | 3233 | 5.0 | 6 |\n", "| 99999 | 787 | 5.0 | 7 |\n", "| 99999 | 3382 | 5.0 | 8 |\n", "| 99999 | 3656 | 5.0 | 9 |\n", "| 99999 | 3280 | 5.0 | 10 |\n", "+---------+----------+-------+------+\n", "[10 rows x 4 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m2.recommend(users=[99999], new_observation_data=recent_data)#.join(items, on='movie_id').sort('rank')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Saving and loading models" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "m.save('my_model')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "m_again = graphlab.load_model('my_model')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "m_again" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python [conda env:anaconda]", "language": "python", "name": "conda-env-anaconda-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 0, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": false, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "475px", "left": "920px", "top": "140.384px", "width": "230px" }, "toc_section_display": false, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 1 }