{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"***\n",
"***\n",
"# 使用Turicreate进行电影推荐\n",
"***\n",
"***\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:01:17.612752Z",
"start_time": "2019-06-15T07:01:16.946694Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"import turicreate as tc\n",
"# set canvas to show sframes and sgraphs in ipython notebook\n",
"# import matplotlib.pyplot as plt\n",
"# %matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"# download data from: http://files.grouplens.org/datasets/movielens/ml-1m.zip"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:01:45.368349Z",
"start_time": "2019-06-15T07:01:35.370047Z"
},
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/html": [
"
Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat
"
],
"text/plain": [
"Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Parsing completed. Parsed 100 lines in 0.281192 secs.
"
],
"text/plain": [
"Parsing completed. Parsed 100 lines in 0.281192 secs."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------------------\n",
"Inferred types from first 100 line(s) of file as \n",
"column_type_hints=[str]\n",
"If parsing fails due to incorrect types, you can correct\n",
"the inferred type list above and pass it to read_csv in\n",
"the column_type_hints argument\n",
"------------------------------------------------------\n"
]
},
{
"data": {
"text/html": [
"Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat
"
],
"text/plain": [
"Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/ratings.dat"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Parsing completed. Parsed 1000209 lines in 0.372092 secs.
"
],
"text/plain": [
"Parsing completed. Parsed 1000209 lines in 0.372092 secs."
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = tc.SFrame.read_csv('/Users/datalab/bigdata/cjc/ml-1m/ratings.dat', delimiter='\\n', \n",
" header=False)['X1'].apply(lambda x: x.split('::')).unpack()\n",
"for col in data.column_names():\n",
" data[col] = data[col].astype(int)\n",
"data = data.rename({'X.0': 'user_id', 'X.1': 'movie_id', 'X.2': 'rating', 'X.3': 'timestamp'})\n",
"#data.save('ratings')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:02:14.283554Z",
"start_time": "2019-06-15T07:02:14.143619Z"
},
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/html": [
"Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat
"
],
"text/plain": [
"Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Parsing completed. Parsed 100 lines in 0.028041 secs.
"
],
"text/plain": [
"Parsing completed. Parsed 100 lines in 0.028041 secs."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"------------------------------------------------------\n",
"Inferred types from first 100 line(s) of file as \n",
"column_type_hints=[str]\n",
"If parsing fails due to incorrect types, you can correct\n",
"the inferred type list above and pass it to read_csv in\n",
"the column_type_hints argument\n",
"------------------------------------------------------\n"
]
},
{
"data": {
"text/html": [
"Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat
"
],
"text/plain": [
"Finished parsing file /Users/datalab/bigdata/cjc/ml-1m/users.dat"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Parsing completed. Parsed 6040 lines in 0.007235 secs.
"
],
"text/plain": [
"Parsing completed. Parsed 6040 lines in 0.007235 secs."
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"users = tc.SFrame.read_csv('/Users/datalab/bigdata/cjc/ml-1m/users.dat', delimiter='\\n', \n",
" header=False)['X1'].apply(lambda x: x.split('::')).unpack()\n",
"users = users.rename({'X.0': 'user_id', 'X.1': 'gender', 'X.2': 'age', 'X.3': 'occupation', 'X.4': 'zip-code'})\n",
"users['user_id'] = users['user_id'].astype(int)\n",
"users.save('users')\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:22:48.875374Z",
"start_time": "2019-06-14T16:22:48.872581Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"#items = tc.SFrame.read_csv('/Users/datalab/bigdata/ml-1m/movies.dat', delimiter='\\n', header=False)#['X1'].apply(lambda x: x.split('::')).unpack()\n",
"# items = items.rename({'X.0': 'movie_id', 'X.1': 'title', 'X.2': 'genre'})\n",
"# items['movie_id'] = items['movie_id'].astype(int)\n",
"# items.save('items')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:02:25.813647Z",
"start_time": "2019-06-15T07:02:25.785898Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" user_id | \n",
" movie_id | \n",
" rating | \n",
" timestamp | \n",
"
\n",
" \n",
" 1 | \n",
" 1193 | \n",
" 5 | \n",
" 978300760 | \n",
"
\n",
" \n",
" 1 | \n",
" 661 | \n",
" 3 | \n",
" 978302109 | \n",
"
\n",
" \n",
" 1 | \n",
" 914 | \n",
" 3 | \n",
" 978301968 | \n",
"
\n",
" \n",
" 1 | \n",
" 3408 | \n",
" 4 | \n",
" 978300275 | \n",
"
\n",
" \n",
" 1 | \n",
" 2355 | \n",
" 5 | \n",
" 978824291 | \n",
"
\n",
" \n",
" 1 | \n",
" 1197 | \n",
" 3 | \n",
" 978302268 | \n",
"
\n",
" \n",
" 1 | \n",
" 1287 | \n",
" 5 | \n",
" 978302039 | \n",
"
\n",
" \n",
" 1 | \n",
" 2804 | \n",
" 5 | \n",
" 978300719 | \n",
"
\n",
" \n",
" 1 | \n",
" 594 | \n",
" 4 | \n",
" 978302268 | \n",
"
\n",
" \n",
" 1 | \n",
" 919 | \n",
" 4 | \n",
" 978301368 | \n",
"
\n",
"
\n",
"[1000209 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n",
"
"
],
"text/plain": [
"Columns:\n",
"\tuser_id\tint\n",
"\tmovie_id\tint\n",
"\trating\tint\n",
"\ttimestamp\tint\n",
"\n",
"Rows: 1000209\n",
"\n",
"Data:\n",
"+---------+----------+--------+-----------+\n",
"| user_id | movie_id | rating | timestamp |\n",
"+---------+----------+--------+-----------+\n",
"| 1 | 1193 | 5 | 978300760 |\n",
"| 1 | 661 | 3 | 978302109 |\n",
"| 1 | 914 | 3 | 978301968 |\n",
"| 1 | 3408 | 4 | 978300275 |\n",
"| 1 | 2355 | 5 | 978824291 |\n",
"| 1 | 1197 | 3 | 978302268 |\n",
"| 1 | 1287 | 5 | 978302039 |\n",
"| 1 | 2804 | 5 | 978300719 |\n",
"| 1 | 594 | 4 | 978302268 |\n",
"| 1 | 919 | 4 | 978301368 |\n",
"+---------+----------+--------+-----------+\n",
"[1000209 rows x 4 columns]\n",
"Note: Only the head of the SFrame is printed.\n",
"You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns."
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:22:57.906209Z",
"start_time": "2019-06-14T16:22:57.903728Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"#items"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:02:48.315958Z",
"start_time": "2019-06-15T07:02:48.294741Z"
},
"slideshow": {
"slide_type": "subslide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" user_id | \n",
" gender | \n",
" age | \n",
" occupation | \n",
" zip-code | \n",
"
\n",
" \n",
" 1 | \n",
" F | \n",
" 1 | \n",
" 10 | \n",
" 48067 | \n",
"
\n",
" \n",
" 2 | \n",
" M | \n",
" 56 | \n",
" 16 | \n",
" 70072 | \n",
"
\n",
" \n",
" 3 | \n",
" M | \n",
" 25 | \n",
" 15 | \n",
" 55117 | \n",
"
\n",
" \n",
" 4 | \n",
" M | \n",
" 45 | \n",
" 7 | \n",
" 02460 | \n",
"
\n",
" \n",
" 5 | \n",
" M | \n",
" 25 | \n",
" 20 | \n",
" 55455 | \n",
"
\n",
" \n",
" 6 | \n",
" F | \n",
" 50 | \n",
" 9 | \n",
" 55117 | \n",
"
\n",
" \n",
" 7 | \n",
" M | \n",
" 35 | \n",
" 1 | \n",
" 06810 | \n",
"
\n",
" \n",
" 8 | \n",
" M | \n",
" 25 | \n",
" 12 | \n",
" 11413 | \n",
"
\n",
" \n",
" 9 | \n",
" M | \n",
" 25 | \n",
" 17 | \n",
" 61614 | \n",
"
\n",
" \n",
" 10 | \n",
" F | \n",
" 35 | \n",
" 1 | \n",
" 95370 | \n",
"
\n",
"
\n",
"[6040 rows x 5 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n",
"
"
],
"text/plain": [
"Columns:\n",
"\tuser_id\tint\n",
"\tgender\tstr\n",
"\tage\tstr\n",
"\toccupation\tstr\n",
"\tzip-code\tstr\n",
"\n",
"Rows: 6040\n",
"\n",
"Data:\n",
"+---------+--------+-----+------------+----------+\n",
"| user_id | gender | age | occupation | zip-code |\n",
"+---------+--------+-----+------------+----------+\n",
"| 1 | F | 1 | 10 | 48067 |\n",
"| 2 | M | 56 | 16 | 70072 |\n",
"| 3 | M | 25 | 15 | 55117 |\n",
"| 4 | M | 45 | 7 | 02460 |\n",
"| 5 | M | 25 | 20 | 55455 |\n",
"| 6 | F | 50 | 9 | 55117 |\n",
"| 7 | M | 35 | 1 | 06810 |\n",
"| 8 | M | 25 | 12 | 11413 |\n",
"| 9 | M | 25 | 17 | 61614 |\n",
"| 10 | F | 35 | 1 | 95370 |\n",
"+---------+--------+-----+------------+----------+\n",
"[6040 rows x 5 columns]\n",
"Note: Only the head of the SFrame is printed.\n",
"You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns."
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"users"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2018-05-05T06:33:14.874154Z",
"start_time": "2018-05-05T06:33:13.913325Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"#data = data.join(items, on='movie_id')"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:23:16.893160Z",
"start_time": "2019-06-14T16:23:16.890595Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"#data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:03:02.788476Z",
"start_time": "2019-06-15T07:03:02.784988Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"train_set, test_set = data.random_split(0.95, seed=1)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:03:33.095594Z",
"start_time": "2019-06-15T07:03:12.909391Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"Preparing data set.
"
],
"text/plain": [
"Preparing data set."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Data has 949852 observations with 6040 users and 3701 items.
"
],
"text/plain": [
" Data has 949852 observations with 6040 users and 3701 items."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Data prepared in: 0.550091s
"
],
"text/plain": [
" Data prepared in: 0.550091s"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Training ranking_factorization_recommender for recommendations.
"
],
"text/plain": [
"Training ranking_factorization_recommender for recommendations."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+--------------------------------+--------------------------------------------------+----------+
"
],
"text/plain": [
"+--------------------------------+--------------------------------------------------+----------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| Parameter | Description | Value |
"
],
"text/plain": [
"| Parameter | Description | Value |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+--------------------------------+--------------------------------------------------+----------+
"
],
"text/plain": [
"+--------------------------------+--------------------------------------------------+----------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| num_factors | Factor Dimension | 32 |
"
],
"text/plain": [
"| num_factors | Factor Dimension | 32 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| regularization | L2 Regularization on Factors | 1e-09 |
"
],
"text/plain": [
"| regularization | L2 Regularization on Factors | 1e-09 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| solver | Solver used for training | adagrad |
"
],
"text/plain": [
"| solver | Solver used for training | adagrad |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| linear_regularization | L2 Regularization on Linear Coefficients | 1e-09 |
"
],
"text/plain": [
"| linear_regularization | L2 Regularization on Linear Coefficients | 1e-09 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| ranking_regularization | Rank-based Regularization Weight | 0.25 |
"
],
"text/plain": [
"| ranking_regularization | Rank-based Regularization Weight | 0.25 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| max_iterations | Maximum Number of Iterations | 25 |
"
],
"text/plain": [
"| max_iterations | Maximum Number of Iterations | 25 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+--------------------------------+--------------------------------------------------+----------+
"
],
"text/plain": [
"+--------------------------------+--------------------------------------------------+----------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Optimizing model using SGD; tuning step size.
"
],
"text/plain": [
" Optimizing model using SGD; tuning step size."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Using 118731 / 949852 points for tuning the step size.
"
],
"text/plain": [
" Using 118731 / 949852 points for tuning the step size."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+---------+-------------------+------------------------------------------+
"
],
"text/plain": [
"+---------+-------------------+------------------------------------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| Attempt | Initial Step Size | Estimated Objective Value |
"
],
"text/plain": [
"| Attempt | Initial Step Size | Estimated Objective Value |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+---------+-------------------+------------------------------------------+
"
],
"text/plain": [
"+---------+-------------------+------------------------------------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 0 | 16.6667 | Not Viable |
"
],
"text/plain": [
"| 0 | 16.6667 | Not Viable |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 1 | 4.16667 | Not Viable |
"
],
"text/plain": [
"| 1 | 4.16667 | Not Viable |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 2 | 1.04167 | Not Viable |
"
],
"text/plain": [
"| 2 | 1.04167 | Not Viable |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 3 | 0.260417 | Not Viable |
"
],
"text/plain": [
"| 3 | 0.260417 | Not Viable |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 4 | 0.0651042 | 1.8722 |
"
],
"text/plain": [
"| 4 | 0.0651042 | 1.8722 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 5 | 0.0325521 | 1.94425 |
"
],
"text/plain": [
"| 5 | 0.0325521 | 1.94425 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 6 | 0.016276 | 1.95877 |
"
],
"text/plain": [
"| 6 | 0.016276 | 1.95877 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 7 | 0.00813802 | 2.0441 |
"
],
"text/plain": [
"| 7 | 0.00813802 | 2.0441 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+---------+-------------------+------------------------------------------+
"
],
"text/plain": [
"+---------+-------------------+------------------------------------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| Final | 0.0651042 | 1.8722 |
"
],
"text/plain": [
"| Final | 0.0651042 | 1.8722 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+---------+-------------------+------------------------------------------+
"
],
"text/plain": [
"+---------+-------------------+------------------------------------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Starting Optimization.
"
],
"text/plain": [
"Starting Optimization."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+---------+--------------+-------------------+-----------------------+-------------+
"
],
"text/plain": [
"+---------+--------------+-------------------+-----------------------+-------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| Iter. | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size |
"
],
"text/plain": [
"| Iter. | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+---------+--------------+-------------------+-----------------------+-------------+
"
],
"text/plain": [
"+---------+--------------+-------------------+-----------------------+-------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| Initial | 110us | 2.44718 | 1.1172 | |
"
],
"text/plain": [
"| Initial | 110us | 2.44718 | 1.1172 | |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+---------+--------------+-------------------+-----------------------+-------------+
"
],
"text/plain": [
"+---------+--------------+-------------------+-----------------------+-------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 1 | 536.251ms | 2.09737 | 1.13925 | 0.0651042 |
"
],
"text/plain": [
"| 1 | 536.251ms | 2.09737 | 1.13925 | 0.0651042 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 2 | 1.05s | 1.85594 | 1.06079 | 0.0651042 |
"
],
"text/plain": [
"| 2 | 1.05s | 1.85594 | 1.06079 | 0.0651042 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 3 | 1.55s | 1.79883 | 1.03161 | 0.0651042 |
"
],
"text/plain": [
"| 3 | 1.55s | 1.79883 | 1.03161 | 0.0651042 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 4 | 2.06s | 1.77231 | 1.02676 | 0.0651042 |
"
],
"text/plain": [
"| 4 | 2.06s | 1.77231 | 1.02676 | 0.0651042 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 5 | 2.57s | 1.75455 | 1.02264 | 0.0651042 |
"
],
"text/plain": [
"| 5 | 2.57s | 1.75455 | 1.02264 | 0.0651042 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 10 | 5.81s | 1.66968 | 0.995516 | 0.0651042 |
"
],
"text/plain": [
"| 10 | 5.81s | 1.66968 | 0.995516 | 0.0651042 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 20 | 12.34s | 1.58039 | 0.969493 | 0.0651042 |
"
],
"text/plain": [
"| 20 | 12.34s | 1.58039 | 0.969493 | 0.0651042 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 25 | 15.69s | 1.54869 | 0.961055 | 0.0651042 |
"
],
"text/plain": [
"| 25 | 15.69s | 1.54869 | 0.961055 | 0.0651042 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+---------+--------------+-------------------+-----------------------+-------------+
"
],
"text/plain": [
"+---------+--------------+-------------------+-----------------------+-------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Optimization Complete: Maximum number of passes through the data reached.
"
],
"text/plain": [
"Optimization Complete: Maximum number of passes through the data reached."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Computing final objective value and training RMSE.
"
],
"text/plain": [
"Computing final objective value and training RMSE."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Final objective value: 1.57752
"
],
"text/plain": [
" Final objective value: 1.57752"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Final training RMSE: 0.95536
"
],
"text/plain": [
" Final training RMSE: 0.95536"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"m = tc.recommender.create(train_set, 'user_id', 'movie_id', 'rating')"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:23:51.502615Z",
"start_time": "2019-06-14T16:23:51.482326Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Class : RankingFactorizationRecommender\n",
"\n",
"Schema\n",
"------\n",
"User ID : user_id\n",
"Item ID : movie_id\n",
"Target : rating\n",
"Additional observation features : 1\n",
"User side features : []\n",
"Item side features : []\n",
"\n",
"Statistics\n",
"----------\n",
"Number of observations : 949852\n",
"Number of users : 6040\n",
"Number of items : 3701\n",
"\n",
"Training summary\n",
"----------------\n",
"Training time : 21.9973\n",
"\n",
"Model Parameters\n",
"----------------\n",
"Model class : RankingFactorizationRecommender\n",
"num_factors : 32\n",
"binary_target : 0\n",
"side_data_factorization : 1\n",
"solver : auto\n",
"nmf : 0\n",
"max_iterations : 25\n",
"\n",
"Regularization Settings\n",
"-----------------------\n",
"regularization : 0.0\n",
"regularization_type : normal\n",
"linear_regularization : 0.0\n",
"ranking_regularization : 0.25\n",
"unobserved_rating_value : -1.7976931348623157e+308\n",
"num_sampled_negative_examples : 4\n",
"ials_confidence_scaling_type : auto\n",
"ials_confidence_scaling_factor : 1\n",
"\n",
"Optimization Settings\n",
"---------------------\n",
"init_random_sigma : 0.01\n",
"sgd_convergence_interval : 4\n",
"sgd_convergence_threshold : 0.0\n",
"sgd_max_trial_iterations : 5\n",
"sgd_sampling_block_size : 131072\n",
"sgd_step_adjustment_interval : 4\n",
"sgd_step_size : 0.0\n",
"sgd_trial_sample_minimum_size : 10000\n",
"sgd_trial_sample_proportion : 0.125\n",
"step_size_decrease_rate : 0.75\n",
"additional_iterations_if_unhealthy : 5\n",
"adagrad_momentum_weighting : 0.9\n",
"num_tempering_iterations : 4\n",
"tempering_regularization_start_value : 0.0\n",
"track_exact_loss : 0"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:04:21.017511Z",
"start_time": "2019-06-15T07:04:17.764265Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"Warning: Ignoring columns timestamp;
"
],
"text/plain": [
"Warning: Ignoring columns timestamp;"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" To use these columns in scoring predictions, use a model that allows the use of additional features.
"
],
"text/plain": [
" To use these columns in scoring predictions, use a model that allows the use of additional features."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Preparing data set.
"
],
"text/plain": [
"Preparing data set."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Data has 949852 observations with 6040 users and 3701 items.
"
],
"text/plain": [
" Data has 949852 observations with 6040 users and 3701 items."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Data prepared in: 0.426101s
"
],
"text/plain": [
" Data prepared in: 0.426101s"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Training model from provided data.
"
],
"text/plain": [
"Training model from provided data."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Gathering per-item and per-user statistics.
"
],
"text/plain": [
"Gathering per-item and per-user statistics."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+--------------------------------+------------+
"
],
"text/plain": [
"+--------------------------------+------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| Elapsed Time (Item Statistics) | % Complete |
"
],
"text/plain": [
"| Elapsed Time (Item Statistics) | % Complete |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+--------------------------------+------------+
"
],
"text/plain": [
"+--------------------------------+------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 27.234ms | 16.5 |
"
],
"text/plain": [
"| 27.234ms | 16.5 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 42.954ms | 100 |
"
],
"text/plain": [
"| 42.954ms | 100 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+--------------------------------+------------+
"
],
"text/plain": [
"+--------------------------------+------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Setting up lookup tables.
"
],
"text/plain": [
"Setting up lookup tables."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Processing data in one pass using dense lookup tables.
"
],
"text/plain": [
"Processing data in one pass using dense lookup tables."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+-------------------------------------+------------------+-----------------+
"
],
"text/plain": [
"+-------------------------------------+------------------+-----------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |
"
],
"text/plain": [
"| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+-------------------------------------+------------------+-----------------+
"
],
"text/plain": [
"+-------------------------------------+------------------+-----------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 73.627ms | 0 | 2 |
"
],
"text/plain": [
"| 73.627ms | 0 | 2 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"| 2.79s | 100 | 3701 |
"
],
"text/plain": [
"| 2.79s | 100 | 3701 |"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"+-------------------------------------+------------------+-----------------+
"
],
"text/plain": [
"+-------------------------------------+------------------+-----------------+"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Finalizing lookup tables.
"
],
"text/plain": [
"Finalizing lookup tables."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Generating candidate set for working with new users.
"
],
"text/plain": [
"Generating candidate set for working with new users."
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Finished training in 2.82252s
"
],
"text/plain": [
"Finished training in 2.82252s"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"m2 = tc.item_similarity_recommender.create(train_set, \n",
" 'user_id', 'movie_id', 'rating',\n",
" similarity_type='pearson')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:04:26.908070Z",
"start_time": "2019-06-15T07:04:26.896928Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Class : ItemSimilarityRecommender\n",
"\n",
"Schema\n",
"------\n",
"User ID : user_id\n",
"Item ID : movie_id\n",
"Target : rating\n",
"Additional observation features : 0\n",
"User side features : []\n",
"Item side features : []\n",
"\n",
"Statistics\n",
"----------\n",
"Number of observations : 949852\n",
"Number of users : 6040\n",
"Number of items : 3701\n",
"\n",
"Training summary\n",
"----------------\n",
"Training time : 2.8226\n",
"\n",
"Model Parameters\n",
"----------------\n",
"Model class : ItemSimilarityRecommender\n",
"threshold : 0.001\n",
"similarity_type : pearson\n",
"training_method : auto\n",
"\n",
"Other Settings\n",
"--------------\n",
"max_data_passes : 4096\n",
"max_item_neighborhood_size : 64\n",
"nearest_neighbors_interaction_proportion_threshold : 0.05\n",
"target_memory_usage : 8589934592\n",
"sparse_density_estimation_sample_size : 4096\n",
"degree_approximation_threshold : 4096\n",
"seed_item_set_size : 50"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m2"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-15T07:04:50.789017Z",
"start_time": "2019-06-15T07:04:47.875428Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"compare_models: using 2811 users to estimate model performance\n",
"PROGRESS: Evaluate model M0\n"
]
},
{
"data": {
"text/html": [
"recommendations finished on 1000/2811 queries. users per second: 10084.7
"
],
"text/plain": [
"recommendations finished on 1000/2811 queries. users per second: 10084.7"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"recommendations finished on 2000/2811 queries. users per second: 10557.4
"
],
"text/plain": [
"recommendations finished on 2000/2811 queries. users per second: 10557.4"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Precision and recall summary statistics by cutoff\n",
"+--------+----------------------+----------------------+\n",
"| cutoff | mean_recall | mean_precision |\n",
"+--------+----------------------+----------------------+\n",
"| 1 | 0.004372314245294037 | 0.03344005691924596 |\n",
"| 2 | 0.008439255238125647 | 0.030771967271433692 |\n",
"| 3 | 0.011792773608123091 | 0.029764022293371297 |\n",
"| 4 | 0.014103362205887681 | 0.027303450729277888 |\n",
"| 5 | 0.017724646480050326 | 0.026894343649946677 |\n",
"| 6 | 0.01985047799128097 | 0.02549507885687179 |\n",
"| 7 | 0.023037645809147193 | 0.025054632311836182 |\n",
"| 8 | 0.02564717744662357 | 0.024101743151903235 |\n",
"| 9 | 0.027494985038662042 | 0.023123443614372085 |\n",
"| 10 | 0.02954846065621093 | 0.022483102098897183 |\n",
"+--------+----------------------+----------------------+\n",
"[10 rows x 3 columns]\n",
"\n",
"\n",
"Overall RMSE: 0.988323739301448\n",
"\n",
"Per User RMSE (best)\n",
"+---------+----------------------+-------+\n",
"| user_id | rmse | count |\n",
"+---------+----------------------+-------+\n",
"| 4695 | 0.008856667044261357 | 1 |\n",
"+---------+----------------------+-------+\n",
"[1 rows x 3 columns]\n",
"\n",
"\n",
"Per User RMSE (worst)\n",
"+---------+-------------------+-------+\n",
"| user_id | rmse | count |\n",
"+---------+-------------------+-------+\n",
"| 1102 | 2.957562522855876 | 1 |\n",
"+---------+-------------------+-------+\n",
"[1 rows x 3 columns]\n",
"\n",
"\n",
"Per Item RMSE (best)\n",
"+----------+----------------------+-------+\n",
"| movie_id | rmse | count |\n",
"+----------+----------------------+-------+\n",
"| 3674 | 0.012974611607248221 | 1 |\n",
"+----------+----------------------+-------+\n",
"[1 rows x 3 columns]\n",
"\n",
"\n",
"Per Item RMSE (worst)\n",
"+----------+--------------------+-------+\n",
"| movie_id | rmse | count |\n",
"+----------+--------------------+-------+\n",
"| 3886 | 3.4432479133103597 | 1 |\n",
"+----------+--------------------+-------+\n",
"[1 rows x 3 columns]\n",
"\n",
"PROGRESS: Evaluate model M1\n"
]
},
{
"data": {
"text/html": [
"recommendations finished on 1000/2811 queries. users per second: 23065.4
"
],
"text/plain": [
"recommendations finished on 1000/2811 queries. users per second: 23065.4"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"recommendations finished on 2000/2811 queries. users per second: 24766.9
"
],
"text/plain": [
"recommendations finished on 2000/2811 queries. users per second: 24766.9"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Precision and recall summary statistics by cutoff\n",
"+--------+-------------+----------------+\n",
"| cutoff | mean_recall | mean_precision |\n",
"+--------+-------------+----------------+\n",
"| 1 | 0.0 | 0.0 |\n",
"| 2 | 0.0 | 0.0 |\n",
"| 3 | 0.0 | 0.0 |\n",
"| 4 | 0.0 | 0.0 |\n",
"| 5 | 0.0 | 0.0 |\n",
"| 6 | 0.0 | 0.0 |\n",
"| 7 | 0.0 | 0.0 |\n",
"| 8 | 0.0 | 0.0 |\n",
"| 9 | 0.0 | 0.0 |\n",
"| 10 | 0.0 | 0.0 |\n",
"+--------+-------------+----------------+\n",
"[10 rows x 3 columns]\n",
"\n",
"\n",
"Overall RMSE: 0.977554609754323\n",
"\n",
"Per User RMSE (best)\n",
"+---------+-----------------------+-------+\n",
"| user_id | rmse | count |\n",
"+---------+-----------------------+-------+\n",
"| 3872 | 4.440892098500626e-16 | 1 |\n",
"+---------+-----------------------+-------+\n",
"[1 rows x 3 columns]\n",
"\n",
"\n",
"Per User RMSE (worst)\n",
"+---------+--------------------+-------+\n",
"| user_id | rmse | count |\n",
"+---------+--------------------+-------+\n",
"| 5214 | 3.2845314102161183 | 2 |\n",
"+---------+--------------------+-------+\n",
"[1 rows x 3 columns]\n",
"\n",
"\n",
"Per Item RMSE (best)\n",
"+----------+------+-------+\n",
"| movie_id | rmse | count |\n",
"+----------+------+-------+\n",
"| 1842 | 0.0 | 1 |\n",
"+----------+------+-------+\n",
"[1 rows x 3 columns]\n",
"\n",
"\n",
"Per Item RMSE (worst)\n",
"+----------+------+-------+\n",
"| movie_id | rmse | count |\n",
"+----------+------+-------+\n",
"| 572 | 4.0 | 1 |\n",
"+----------+------+-------+\n",
"[1 rows x 3 columns]\n",
"\n"
]
}
],
"source": [
"result = tc.recommender.util.compare_models(test_set, \n",
" [m, m2],\n",
" user_sample=.5, skip_set=train_set)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Getting similar items"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:24:53.589323Z",
"start_time": "2019-06-14T16:24:53.568580Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" movie_id | \n",
" similar | \n",
" score | \n",
" rank | \n",
"
\n",
" \n",
" 1287 | \n",
" 1262 | \n",
" 0.8935538530349731 | \n",
" 1 | \n",
"
\n",
" \n",
" 1287 | \n",
" 1272 | \n",
" 0.8684239983558655 | \n",
" 2 | \n",
"
\n",
" \n",
" 1287 | \n",
" 2662 | \n",
" 0.8668187260627747 | \n",
" 3 | \n",
"
\n",
" \n",
" 1287 | \n",
" 3366 | \n",
" 0.8548122048377991 | \n",
" 4 | \n",
"
\n",
" \n",
" 1287 | \n",
" 2948 | \n",
" 0.8543752431869507 | \n",
" 5 | \n",
"
\n",
" \n",
" 1287 | \n",
" 3062 | \n",
" 0.8494184017181396 | \n",
" 6 | \n",
"
\n",
" \n",
" 1287 | \n",
" 2947 | \n",
" 0.8432653546333313 | \n",
" 7 | \n",
"
\n",
" \n",
" 1287 | \n",
" 3836 | \n",
" 0.8384832739830017 | \n",
" 8 | \n",
"
\n",
" \n",
" 1287 | \n",
" 1304 | \n",
" 0.8308332562446594 | \n",
" 9 | \n",
"
\n",
" \n",
" 1287 | \n",
" 1250 | \n",
" 0.8267531394958496 | \n",
" 10 | \n",
"
\n",
"
\n",
"[10 rows x 4 columns]
\n",
"
"
],
"text/plain": [
"Columns:\n",
"\tmovie_id\tint\n",
"\tsimilar\tint\n",
"\tscore\tfloat\n",
"\trank\tint\n",
"\n",
"Rows: 10\n",
"\n",
"Data:\n",
"+----------+---------+--------------------+------+\n",
"| movie_id | similar | score | rank |\n",
"+----------+---------+--------------------+------+\n",
"| 1287 | 1262 | 0.8935538530349731 | 1 |\n",
"| 1287 | 1272 | 0.8684239983558655 | 2 |\n",
"| 1287 | 2662 | 0.8668187260627747 | 3 |\n",
"| 1287 | 3366 | 0.8548122048377991 | 4 |\n",
"| 1287 | 2948 | 0.8543752431869507 | 5 |\n",
"| 1287 | 3062 | 0.8494184017181396 | 6 |\n",
"| 1287 | 2947 | 0.8432653546333313 | 7 |\n",
"| 1287 | 3836 | 0.8384832739830017 | 8 |\n",
"| 1287 | 1304 | 0.8308332562446594 | 9 |\n",
"| 1287 | 1250 | 0.8267531394958496 | 10 |\n",
"+----------+---------+--------------------+------+\n",
"[10 rows x 4 columns]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m.get_similar_items([1287]) # movie_id is Ben-Hur"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"ExecuteTime": {
"end_time": "2018-05-05T06:45:15.507691Z",
"start_time": "2018-05-05T06:45:15.502035Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Help on method get_similar_items in module graphlab.toolkits.recommender.util:\n",
"\n",
"get_similar_items(self, items=None, k=10, verbose=False) method of graphlab.toolkits.recommender.ranking_factorization_recommender.RankingFactorizationRecommender instance\n",
" Get the k most similar items for each item in items.\n",
" \n",
" Each type of recommender has its own model for the similarity\n",
" between items. For example, the item_similarity_recommender will\n",
" return the most similar items according to the user-chosen\n",
" similarity; the factorization_recommender will return the\n",
" nearest items based on the cosine similarity between latent item\n",
" factors.\n",
" \n",
" Parameters\n",
" ----------\n",
" items : SArray or list; optional\n",
" An :class:`~graphlab.SArray` or list of item ids for which to get\n",
" similar items. If 'None', then return the `k` most similar items for\n",
" all items in the training set.\n",
" \n",
" k : int, optional\n",
" The number of similar items for each item.\n",
" \n",
" verbose : bool, optional\n",
" Progress printing is shown.\n",
" \n",
" Returns\n",
" -------\n",
" out : SFrame\n",
" A SFrame with the top ranked similar items for each item. The\n",
" columns `item`, 'similar', 'score' and 'rank', where\n",
" `item` matches the item column name specified at training time.\n",
" The 'rank' is between 1 and `k` and 'score' gives the similarity\n",
" score of that item. The value of the score depends on the method\n",
" used for computing item similarities.\n",
" \n",
" Examples\n",
" --------\n",
" \n",
" >>> sf = graphlab.SFrame({'user_id': [\"0\", \"0\", \"0\", \"1\", \"1\", \"2\", \"2\", \"2\"],\n",
" 'item_id': [\"a\", \"b\", \"c\", \"a\", \"b\", \"b\", \"c\", \"d\"]})\n",
" >>> m = graphlab.item_similarity_recommender.create(sf)\n",
" >>> nn = m.get_similar_items()\n",
"\n"
]
}
],
"source": [
"help(m.get_similar_items)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "subslide"
}
},
"source": [
"'score' gives the similarity score of that item"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:25:06.875078Z",
"start_time": "2019-06-14T16:25:06.872421Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"# m.get_similar_items([1287]).join(items, on={'similar': 'movie_id'}).sort('rank')"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Making recommendations"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:25:10.176791Z",
"start_time": "2019-06-14T16:25:09.614271Z"
},
"slideshow": {
"slide_type": "fragment"
}
},
"outputs": [
{
"data": {
"text/html": [
"recommendations finished on 1000/6040 queries. users per second: 11685.2
"
],
"text/plain": [
"recommendations finished on 1000/6040 queries. users per second: 11685.2"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"recommendations finished on 2000/6040 queries. users per second: 11654.4
"
],
"text/plain": [
"recommendations finished on 2000/6040 queries. users per second: 11654.4"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"recommendations finished on 3000/6040 queries. users per second: 11658.6
"
],
"text/plain": [
"recommendations finished on 3000/6040 queries. users per second: 11658.6"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"recommendations finished on 4000/6040 queries. users per second: 11321.5
"
],
"text/plain": [
"recommendations finished on 4000/6040 queries. users per second: 11321.5"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"recommendations finished on 5000/6040 queries. users per second: 11502.9
"
],
"text/plain": [
"recommendations finished on 5000/6040 queries. users per second: 11502.9"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"recommendations finished on 6000/6040 queries. users per second: 11105.9
"
],
"text/plain": [
"recommendations finished on 6000/6040 queries. users per second: 11105.9"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"recs = m.recommend()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:25:12.909100Z",
"start_time": "2019-06-14T16:25:12.888333Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" user_id | \n",
" movie_id | \n",
" score | \n",
" rank | \n",
"
\n",
" \n",
" 1 | \n",
" 318 | \n",
" 5.045622686663793 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 1198 | \n",
" 4.862424055854009 | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 50 | \n",
" 4.76625474802606 | \n",
" 3 | \n",
"
\n",
" \n",
" 1 | \n",
" 593 | \n",
" 4.766107517102884 | \n",
" 4 | \n",
"
\n",
" \n",
" 1 | \n",
" 858 | \n",
" 4.747795152286218 | \n",
" 5 | \n",
"
\n",
" \n",
" 1 | \n",
" 1196 | \n",
" 4.689315832028316 | \n",
" 6 | \n",
"
\n",
" \n",
" 1 | \n",
" 2858 | \n",
" 4.678970253834652 | \n",
" 7 | \n",
"
\n",
" \n",
" 1 | \n",
" 2396 | \n",
" 4.5986619915758835 | \n",
" 8 | \n",
"
\n",
" \n",
" 1 | \n",
" 110 | \n",
" 4.588308471063303 | \n",
" 9 | \n",
"
\n",
" \n",
" 1 | \n",
" 2571 | \n",
" 4.573408636072801 | \n",
" 10 | \n",
"
\n",
"
\n",
"[60400 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n",
"
"
],
"text/plain": [
"Columns:\n",
"\tuser_id\tint\n",
"\tmovie_id\tint\n",
"\tscore\tfloat\n",
"\trank\tint\n",
"\n",
"Rows: 60400\n",
"\n",
"Data:\n",
"+---------+----------+--------------------+------+\n",
"| user_id | movie_id | score | rank |\n",
"+---------+----------+--------------------+------+\n",
"| 1 | 318 | 5.045622686663793 | 1 |\n",
"| 1 | 1198 | 4.862424055854009 | 2 |\n",
"| 1 | 50 | 4.76625474802606 | 3 |\n",
"| 1 | 593 | 4.766107517102884 | 4 |\n",
"| 1 | 858 | 4.747795152286218 | 5 |\n",
"| 1 | 1196 | 4.689315832028316 | 6 |\n",
"| 1 | 2858 | 4.678970253834652 | 7 |\n",
"| 1 | 2396 | 4.5986619915758835 | 8 |\n",
"| 1 | 110 | 4.588308471063303 | 9 |\n",
"| 1 | 2571 | 4.573408636072801 | 10 |\n",
"+---------+----------+--------------------+------+\n",
"[60400 rows x 4 columns]\n",
"Note: Only the head of the SFrame is printed.\n",
"You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns."
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"recs"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:25:17.489601Z",
"start_time": "2019-06-14T16:25:17.232492Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" user_id | \n",
" movie_id | \n",
" rating | \n",
" timestamp | \n",
"
\n",
" \n",
" 4 | \n",
" 3468 | \n",
" 5 | \n",
" 978294008 | \n",
"
\n",
" \n",
" 4 | \n",
" 1210 | \n",
" 3 | \n",
" 978293924 | \n",
"
\n",
" \n",
" 4 | \n",
" 2951 | \n",
" 4 | \n",
" 978294282 | \n",
"
\n",
" \n",
" 4 | \n",
" 1214 | \n",
" 4 | \n",
" 978294260 | \n",
"
\n",
" \n",
" 4 | \n",
" 1036 | \n",
" 4 | \n",
" 978294282 | \n",
"
\n",
" \n",
" 4 | \n",
" 260 | \n",
" 5 | \n",
" 978294199 | \n",
"
\n",
" \n",
" 4 | \n",
" 2028 | \n",
" 5 | \n",
" 978294230 | \n",
"
\n",
" \n",
" 4 | \n",
" 480 | \n",
" 4 | \n",
" 978294008 | \n",
"
\n",
" \n",
" 4 | \n",
" 1196 | \n",
" 2 | \n",
" 978294199 | \n",
"
\n",
" \n",
" 4 | \n",
" 1198 | \n",
" 5 | \n",
" 978294199 | \n",
"
\n",
"
\n",
"[? rows x 4 columns]
Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.
You can use sf.materialize() to force materialization.\n",
"
"
],
"text/plain": [
"Columns:\n",
"\tuser_id\tint\n",
"\tmovie_id\tint\n",
"\trating\tint\n",
"\ttimestamp\tint\n",
"\n",
"Rows: Unknown\n",
"\n",
"Data:\n",
"+---------+----------+--------+-----------+\n",
"| user_id | movie_id | rating | timestamp |\n",
"+---------+----------+--------+-----------+\n",
"| 4 | 3468 | 5 | 978294008 |\n",
"| 4 | 1210 | 3 | 978293924 |\n",
"| 4 | 2951 | 4 | 978294282 |\n",
"| 4 | 1214 | 4 | 978294260 |\n",
"| 4 | 1036 | 4 | 978294282 |\n",
"| 4 | 260 | 5 | 978294199 |\n",
"| 4 | 2028 | 5 | 978294230 |\n",
"| 4 | 480 | 4 | 978294008 |\n",
"| 4 | 1196 | 2 | 978294199 |\n",
"| 4 | 1198 | 5 | 978294199 |\n",
"+---------+----------+--------+-----------+\n",
"[? rows x 4 columns]\n",
"Note: Only the head of the SFrame is printed. This SFrame is lazily evaluated.\n",
"You can use sf.materialize() to force materialization."
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data[data['user_id'] == 4]"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:25:22.784880Z",
"start_time": "2019-06-14T16:25:22.782529Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"# m.recommend(users=[4], k=20).join(items, on='movie_id')"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Recommendations for new users"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:25:34.317252Z",
"start_time": "2019-06-14T16:25:34.296992Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" movie_id | \n",
" user_id | \n",
" rating | \n",
"
\n",
" \n",
" 30 | \n",
" 99999 | \n",
" 2 | \n",
"
\n",
" \n",
" 1000 | \n",
" 99999 | \n",
" 1 | \n",
"
\n",
" \n",
" 900 | \n",
" 99999 | \n",
" 3 | \n",
"
\n",
" \n",
" 883 | \n",
" 99999 | \n",
" 4 | \n",
"
\n",
" \n",
" 251 | \n",
" 99999 | \n",
" 0 | \n",
"
\n",
" \n",
" 200 | \n",
" 99999 | \n",
" 0 | \n",
"
\n",
" \n",
" 199 | \n",
" 99999 | \n",
" 1 | \n",
"
\n",
" \n",
" 180 | \n",
" 99999 | \n",
" 1 | \n",
"
\n",
" \n",
" 120 | \n",
" 99999 | \n",
" 1 | \n",
"
\n",
" \n",
" 991 | \n",
" 99999 | \n",
" 2 | \n",
"
\n",
"
\n",
"[11 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n",
"
"
],
"text/plain": [
"Columns:\n",
"\tmovie_id\tint\n",
"\tuser_id\tint\n",
"\trating\tint\n",
"\n",
"Rows: 11\n",
"\n",
"Data:\n",
"+----------+---------+--------+\n",
"| movie_id | user_id | rating |\n",
"+----------+---------+--------+\n",
"| 30 | 99999 | 2 |\n",
"| 1000 | 99999 | 1 |\n",
"| 900 | 99999 | 3 |\n",
"| 883 | 99999 | 4 |\n",
"| 251 | 99999 | 0 |\n",
"| 200 | 99999 | 0 |\n",
"| 199 | 99999 | 1 |\n",
"| 180 | 99999 | 1 |\n",
"| 120 | 99999 | 1 |\n",
"| 991 | 99999 | 2 |\n",
"+----------+---------+--------+\n",
"[11 rows x 3 columns]\n",
"Note: Only the head of the SFrame is printed.\n",
"You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns."
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"recent_data = tc.SFrame()\n",
"recent_data['movie_id'] = [30, 1000, 900, 883, 251, 200, 199, 180, 120, 991, 1212] \n",
"recent_data['user_id'] = 99999\n",
"recent_data['rating'] = [2, 1, 3, 4, 0, 0, 1, 1, 1, 2, 3]\n",
"recent_data"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"ExecuteTime": {
"end_time": "2019-06-14T16:25:45.789843Z",
"start_time": "2019-06-14T16:25:45.767211Z"
},
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" user_id | \n",
" movie_id | \n",
" score | \n",
" rank | \n",
"
\n",
" \n",
" 99999 | \n",
" 3881 | \n",
" 5.0 | \n",
" 1 | \n",
"
\n",
" \n",
" 99999 | \n",
" 3607 | \n",
" 5.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 99999 | \n",
" 1830 | \n",
" 5.0 | \n",
" 3 | \n",
"
\n",
" \n",
" 99999 | \n",
" 989 | \n",
" 5.0 | \n",
" 4 | \n",
"
\n",
" \n",
" 99999 | \n",
" 3172 | \n",
" 5.0 | \n",
" 5 | \n",
"
\n",
" \n",
" 99999 | \n",
" 3233 | \n",
" 5.0 | \n",
" 6 | \n",
"
\n",
" \n",
" 99999 | \n",
" 787 | \n",
" 5.0 | \n",
" 7 | \n",
"
\n",
" \n",
" 99999 | \n",
" 3382 | \n",
" 5.0 | \n",
" 8 | \n",
"
\n",
" \n",
" 99999 | \n",
" 3656 | \n",
" 5.0 | \n",
" 9 | \n",
"
\n",
" \n",
" 99999 | \n",
" 3280 | \n",
" 5.0 | \n",
" 10 | \n",
"
\n",
"
\n",
"[10 rows x 4 columns]
\n",
"
"
],
"text/plain": [
"Columns:\n",
"\tuser_id\tint\n",
"\tmovie_id\tint\n",
"\tscore\tfloat\n",
"\trank\tint\n",
"\n",
"Rows: 10\n",
"\n",
"Data:\n",
"+---------+----------+-------+------+\n",
"| user_id | movie_id | score | rank |\n",
"+---------+----------+-------+------+\n",
"| 99999 | 3881 | 5.0 | 1 |\n",
"| 99999 | 3607 | 5.0 | 2 |\n",
"| 99999 | 1830 | 5.0 | 3 |\n",
"| 99999 | 989 | 5.0 | 4 |\n",
"| 99999 | 3172 | 5.0 | 5 |\n",
"| 99999 | 3233 | 5.0 | 6 |\n",
"| 99999 | 787 | 5.0 | 7 |\n",
"| 99999 | 3382 | 5.0 | 8 |\n",
"| 99999 | 3656 | 5.0 | 9 |\n",
"| 99999 | 3280 | 5.0 | 10 |\n",
"+---------+----------+-------+------+\n",
"[10 rows x 4 columns]"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"m2.recommend(users=[99999], new_observation_data=recent_data)#.join(items, on='movie_id').sort('rank')"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"### Saving and loading models"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"m.save('my_model')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"m_again = graphlab.load_model('my_model')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"m_again"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"celltoolbar": "Slideshow",
"kernelspec": {
"display_name": "Python [conda env:anaconda]",
"language": "python",
"name": "conda-env-anaconda-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.4"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
"autoclose": false,
"autocomplete": true,
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 0,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
},
"labels_anchors": false,
"latex_user_defs": false,
"report_style_numbering": false,
"user_envs_cfg": false
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": false,
"sideBar": false,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "475px",
"left": "920px",
"top": "140.384px",
"width": "230px"
},
"toc_section_display": false,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 1
}