{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "***\n", "***\n", "# 使用GraphLab进行音乐推荐\n", "***\n", "***\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:07:43.686726Z", "start_time": "2018-05-05T06:07:39.415455Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1525500461.log\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "This non-commercial license of GraphLab Create for academic use is assigned to wangchengjun@nju.edu.cn and will expire on March 14, 2019.\n" ] } ], "source": [ "import graphlab as gl\n", "# set canvas to show sframes and sgraphs in ipython notebook\n", "gl.canvas.set_target('ipynb')" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# 下载数据\n", "http://s3.amazonaws.com/dato-datasets/millionsong/10000.txt\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:12:27.320471Z", "start_time": "2018-05-05T06:12:24.105760Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idmusic_idrating
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOAKIMP12A8C1309951
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOBBMDR12A8C13253B2
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOBXHDL12A81C204C01
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOBYHAJ12A6701BF1D1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SODACBL12A8C13C2731
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SODDNQT12A6D4F5F7E5
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SODXRTY12AB0180F3B1
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOFGUAY12AB017B0A81
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOFRQTD12A81C233C01
b80344d063b5ccb3212f76538
f3d9e43d87dca9e ...
SOHQWYZ12A6D4FA7011
\n", "[2000000 rows x 3 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.\n", "
" ], "text/plain": [ "Columns:\n", "\tuser_id\tstr\n", "\tmusic_id\tstr\n", "\trating\tint\n", "\n", "Rows: 2000000\n", "\n", "Data:\n", "+-------------------------------+--------------------+--------+\n", "| user_id | music_id | rating |\n", "+-------------------------------+--------------------+--------+\n", "| b80344d063b5ccb3212f76538f... | SOAKIMP12A8C130995 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOBBMDR12A8C13253B | 2 |\n", "| b80344d063b5ccb3212f76538f... | SOBXHDL12A81C204C0 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOBYHAJ12A6701BF1D | 1 |\n", "| b80344d063b5ccb3212f76538f... | SODACBL12A8C13C273 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SODDNQT12A6D4F5F7E | 5 |\n", "| b80344d063b5ccb3212f76538f... | SODXRTY12AB0180F3B | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOFGUAY12AB017B0A8 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOFRQTD12A81C233C0 | 1 |\n", "| b80344d063b5ccb3212f76538f... | SOHQWYZ12A6D4FA701 | 1 |\n", "+-------------------------------+--------------------+--------+\n", "[2000000 rows x 3 columns]\n", "Note: Only the head of the SFrame is printed.\n", "You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns." ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#train_file = 'http://s3.amazonaws.com/dato-datasets/millionsong/10000.txt'\n", "train_file = '/Users/datalab/bigdata/cjc/millionsong/song_usage_10000.txt'\n", "sf = gl.SFrame.read_csv(train_file, header=False, delimiter='\\t', verbose=False)\n", "sf.rename({'X1':'user_id', 'X2':'music_id', 'X3':'rating'})" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:13:05.972248Z", "start_time": "2018-05-05T06:13:05.961389Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "train_set, test_set = sf.random_split(0.8, seed=1)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:13:29.312653Z", "start_time": "2018-05-05T06:13:27.066021Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
Recsys training: model = popularity
" ], "text/plain": [ "Recsys training: model = popularity" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Preparing data set.
" ], "text/plain": [ "Preparing data set." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data has 1599753 observations with 76085 users and 10000 items.
" ], "text/plain": [ " Data has 1599753 observations with 76085 users and 10000 items." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data prepared in: 1.2441s
" ], "text/plain": [ " Data prepared in: 1.2441s" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
1599753 observations to process; with 10000 unique items.
" ], "text/plain": [ "1599753 observations to process; with 10000 unique items." ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "popularity_model = gl.popularity_recommender.create(train_set, \n", " 'user_id', 'music_id', \n", " target = 'rating')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:14:56.020548Z", "start_time": "2018-05-05T06:14:53.721427Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
Recsys training: model = item_similarity
" ], "text/plain": [ "Recsys training: model = item_similarity" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Preparing data set.
" ], "text/plain": [ "Preparing data set." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data has 1599753 observations with 76085 users and 10000 items.
" ], "text/plain": [ " Data has 1599753 observations with 76085 users and 10000 items." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data prepared in: 1.18984s
" ], "text/plain": [ " Data prepared in: 1.18984s" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Training model from provided data.
" ], "text/plain": [ "Training model from provided data." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Gathering per-item and per-user statistics.
" ], "text/plain": [ "Gathering per-item and per-user statistics." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+------------+
" ], "text/plain": [ "+--------------------------------+------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Elapsed Time (Item Statistics) | % Complete |
" ], "text/plain": [ "| Elapsed Time (Item Statistics) | % Complete |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+------------+
" ], "text/plain": [ "+--------------------------------+------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 1.549ms                        | 1.25       |
" ], "text/plain": [ "| 1.549ms | 1.25 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 46.313ms                       | 100        |
" ], "text/plain": [ "| 46.313ms | 100 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+------------+
" ], "text/plain": [ "+--------------------------------+------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Setting up lookup tables.
" ], "text/plain": [ "Setting up lookup tables." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Processing data in one pass using dense lookup tables.
" ], "text/plain": [ "Processing data in one pass using dense lookup tables." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+-------------------------------------+------------------+-----------------+
" ], "text/plain": [ "+-------------------------------------+------------------+-----------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |
" ], "text/plain": [ "| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+-------------------------------------+------------------+-----------------+
" ], "text/plain": [ "+-------------------------------------+------------------+-----------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 264.127ms                           | 0                | 0               |
" ], "text/plain": [ "| 264.127ms | 0 | 0 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 994.743ms                           | 100              | 10000           |
" ], "text/plain": [ "| 994.743ms | 100 | 10000 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+-------------------------------------+------------------+-----------------+
" ], "text/plain": [ "+-------------------------------------+------------------+-----------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Finalizing lookup tables.
" ], "text/plain": [ "Finalizing lookup tables." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Generating candidate set for working with new users.
" ], "text/plain": [ "Generating candidate set for working with new users." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Finished training in 1.06117s
" ], "text/plain": [ "Finished training in 1.06117s" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "item_sim_model = gl.item_similarity_recommender.create(train_set, \n", " 'user_id', 'music_id', \n", " target = 'rating', \n", " similarity_type='cosine')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:15:27.619180Z", "start_time": "2018-05-05T06:15:17.795671Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
Recsys training: model = factorization_recommender
" ], "text/plain": [ "Recsys training: model = factorization_recommender" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Preparing data set.
" ], "text/plain": [ "Preparing data set." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data has 1599753 observations with 76085 users and 10000 items.
" ], "text/plain": [ " Data has 1599753 observations with 76085 users and 10000 items." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
    Data prepared in: 1.21379s
" ], "text/plain": [ " Data prepared in: 1.21379s" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Training factorization_recommender for recommendations.
" ], "text/plain": [ "Training factorization_recommender for recommendations." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+--------------------------------------------------+----------+
" ], "text/plain": [ "+--------------------------------+--------------------------------------------------+----------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Parameter                      | Description                                      | Value    |
" ], "text/plain": [ "| Parameter | Description | Value |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+--------------------------------------------------+----------+
" ], "text/plain": [ "+--------------------------------+--------------------------------------------------+----------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| num_factors                    | Factor Dimension                                 | 8        |
" ], "text/plain": [ "| num_factors | Factor Dimension | 8 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| regularization                 | L2 Regularization on Factors                     | 1e-08    |
" ], "text/plain": [ "| regularization | L2 Regularization on Factors | 1e-08 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| solver                         | Solver used for training                         | sgd      |
" ], "text/plain": [ "| solver | Solver used for training | sgd |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| linear_regularization          | L2 Regularization on Linear Coefficients         | 1e-10    |
" ], "text/plain": [ "| linear_regularization | L2 Regularization on Linear Coefficients | 1e-10 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| max_iterations                 | Maximum Number of Iterations                     | 50       |
" ], "text/plain": [ "| max_iterations | Maximum Number of Iterations | 50 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+--------------------------------------------------+----------+
" ], "text/plain": [ "+--------------------------------+--------------------------------------------------+----------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
  Optimizing model using SGD; tuning step size.
" ], "text/plain": [ " Optimizing model using SGD; tuning step size." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
  Using 199969 / 1599753 points for tuning the step size.
" ], "text/plain": [ " Using 199969 / 1599753 points for tuning the step size." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+
" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Attempt | Initial Step Size | Estimated Objective Value                |
" ], "text/plain": [ "| Attempt | Initial Step Size | Estimated Objective Value |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+
" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 0       | 25                | No Decrease (227.564 >= 37.2517)         |
" ], "text/plain": [ "| 0 | 25 | No Decrease (227.564 >= 37.2517) |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 1       | 6.25              | No Decrease (218.774 >= 37.2517)         |
" ], "text/plain": [ "| 1 | 6.25 | No Decrease (218.774 >= 37.2517) |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 2       | 1.5625            | No Decrease (189.223 >= 37.2517)         |
" ], "text/plain": [ "| 2 | 1.5625 | No Decrease (189.223 >= 37.2517) |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 3       | 0.390625          | No Decrease (84.7797 >= 37.2517)         |
" ], "text/plain": [ "| 3 | 0.390625 | No Decrease (84.7797 >= 37.2517) |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 4       | 0.0976562         | 12.0849                                  |
" ], "text/plain": [ "| 4 | 0.0976562 | 12.0849 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 5       | 0.0488281         | 8.26182                                  |
" ], "text/plain": [ "| 5 | 0.0488281 | 8.26182 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 6       | 0.0244141         | 21.5168                                  |
" ], "text/plain": [ "| 6 | 0.0244141 | 21.5168 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+
" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Final   | 0.0488281         | 8.26182                                  |
" ], "text/plain": [ "| Final | 0.0488281 | 8.26182 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+
" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Starting Optimization.
" ], "text/plain": [ "Starting Optimization." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+
" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Iter.   | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size   |
" ], "text/plain": [ "| Iter. | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+
" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Initial | 72us         | 43.795            | 6.61778               |             |
" ], "text/plain": [ "| Initial | 72us | 43.795 | 6.61778 | |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+
" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 1       | 164.211ms    | 43.5009           | 6.59512               | 0.0488281   |
" ], "text/plain": [ "| 1 | 164.211ms | 43.5009 | 6.59512 | 0.0488281 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 2       | 310.478ms    | 40.8579           | 6.39166               | 0.0290334   |
" ], "text/plain": [ "| 2 | 310.478ms | 40.8579 | 6.39166 | 0.0290334 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 3       | 428.164ms    | 37.9236           | 6.15785               | 0.0214205   |
" ], "text/plain": [ "| 3 | 428.164ms | 37.9236 | 6.15785 | 0.0214205 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 4       | 543.402ms    | 35.1185           | 5.92569               | 0.0172633   |
" ], "text/plain": [ "| 4 | 543.402ms | 35.1185 | 5.92569 | 0.0172633 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 5       | 661.456ms    | 32.6788           | 5.7161                | 0.014603    |
" ], "text/plain": [ "| 5 | 661.456ms | 32.6788 | 5.7161 | 0.014603 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 6       | 784.067ms    | 30.599            | 5.53115               | 0.0127367   |
" ], "text/plain": [ "| 6 | 784.067ms | 30.599 | 5.53115 | 0.0127367 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 10      | 1.25s        | 24.7663           | 4.97592               | 0.008683    |
" ], "text/plain": [ "| 10 | 1.25s | 24.7663 | 4.97592 | 0.008683 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 11      | 1.38s        | 23.5067           | 4.84768               | 0.00808399  |
" ], "text/plain": [ "| 11 | 1.38s | 23.5067 | 4.84768 | 0.00808399 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 20      | 2.49s        | 17.6493           | 4.20017               | 0.00516295  |
" ], "text/plain": [ "| 20 | 2.49s | 17.6493 | 4.20017 | 0.00516295 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 30      | 3.65s        | 14.3453           | 3.78639               | 0.00380916  |
" ], "text/plain": [ "| 30 | 3.65s | 14.3453 | 3.78639 | 0.00380916 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 40      | 4.88s        | 12.6728           | 3.55862               | 0.00306991  |
" ], "text/plain": [ "| 40 | 4.88s | 12.6728 | 3.55862 | 0.00306991 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 50      | 6.13s        | 11.2253           | 3.34901               | 0.00218366  |
" ], "text/plain": [ "| 50 | 6.13s | 11.2253 | 3.34901 | 0.00218366 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+
" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Optimization Complete: Maximum number of passes through the data reached.
" ], "text/plain": [ "Optimization Complete: Maximum number of passes through the data reached." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Computing final objective value and training RMSE.
" ], "text/plain": [ "Computing final objective value and training RMSE." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
       Final objective value: 9.81513
" ], "text/plain": [ " Final objective value: 9.81513" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
       Final training RMSE: 3.1314
" ], "text/plain": [ " Final training RMSE: 3.1314" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "factorization_machine_model = gl.recommender.factorization_recommender.create(train_set, \n", " 'user_id', 'music_id',\n", " target='rating')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:23:07.088596Z", "start_time": "2018-05-05T06:23:07.080111Z" } }, "outputs": [ { "data": { "text/plain": [ "1599753" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_set)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:22:12.197893Z", "start_time": "2018-05-05T06:21:47.643985Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "compare_models: using 34355 users to estimate model performance\n", "PROGRESS: Evaluate model M0\n" ] }, { "data": { "text/html": [ "
recommendations finished on 1000/34355 queries. users per second: 18058.4
" ], "text/plain": [ "recommendations finished on 1000/34355 queries. users per second: 18058.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 2000/34355 queries. users per second: 21323.8
" ], "text/plain": [ "recommendations finished on 2000/34355 queries. users per second: 21323.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 3000/34355 queries. users per second: 22068.6
" ], "text/plain": [ "recommendations finished on 3000/34355 queries. users per second: 22068.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 4000/34355 queries. users per second: 22236.7
" ], "text/plain": [ "recommendations finished on 4000/34355 queries. users per second: 22236.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 5000/34355 queries. users per second: 22389.4
" ], "text/plain": [ "recommendations finished on 5000/34355 queries. users per second: 22389.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 6000/34355 queries. users per second: 22147
" ], "text/plain": [ "recommendations finished on 6000/34355 queries. users per second: 22147" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 7000/34355 queries. users per second: 22349.2
" ], "text/plain": [ "recommendations finished on 7000/34355 queries. users per second: 22349.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 8000/34355 queries. users per second: 22358.7
" ], "text/plain": [ "recommendations finished on 8000/34355 queries. users per second: 22358.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 9000/34355 queries. users per second: 22426.9
" ], "text/plain": [ "recommendations finished on 9000/34355 queries. users per second: 22426.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 10000/34355 queries. users per second: 22486.8
" ], "text/plain": [ "recommendations finished on 10000/34355 queries. users per second: 22486.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 11000/34355 queries. users per second: 22207.4
" ], "text/plain": [ "recommendations finished on 11000/34355 queries. users per second: 22207.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 12000/34355 queries. users per second: 22163.2
" ], "text/plain": [ "recommendations finished on 12000/34355 queries. users per second: 22163.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 13000/34355 queries. users per second: 22060.5
" ], "text/plain": [ "recommendations finished on 13000/34355 queries. users per second: 22060.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 14000/34355 queries. users per second: 22038
" ], "text/plain": [ "recommendations finished on 14000/34355 queries. users per second: 22038" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 15000/34355 queries. users per second: 22088.7
" ], "text/plain": [ "recommendations finished on 15000/34355 queries. users per second: 22088.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 16000/34355 queries. users per second: 22082.8
" ], "text/plain": [ "recommendations finished on 16000/34355 queries. users per second: 22082.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 17000/34355 queries. users per second: 22162.7
" ], "text/plain": [ "recommendations finished on 17000/34355 queries. users per second: 22162.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 18000/34355 queries. users per second: 22229.9
" ], "text/plain": [ "recommendations finished on 18000/34355 queries. users per second: 22229.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 19000/34355 queries. users per second: 22293.3
" ], "text/plain": [ "recommendations finished on 19000/34355 queries. users per second: 22293.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 20000/34355 queries. users per second: 22182.1
" ], "text/plain": [ "recommendations finished on 20000/34355 queries. users per second: 22182.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 21000/34355 queries. users per second: 22132.2
" ], "text/plain": [ "recommendations finished on 21000/34355 queries. users per second: 22132.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 22000/34355 queries. users per second: 22136.5
" ], "text/plain": [ "recommendations finished on 22000/34355 queries. users per second: 22136.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 23000/34355 queries. users per second: 21993.7
" ], "text/plain": [ "recommendations finished on 23000/34355 queries. users per second: 21993.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 24000/34355 queries. users per second: 21916.9
" ], "text/plain": [ "recommendations finished on 24000/34355 queries. users per second: 21916.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 25000/34355 queries. users per second: 21920.7
" ], "text/plain": [ "recommendations finished on 25000/34355 queries. users per second: 21920.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 26000/34355 queries. users per second: 21987.5
" ], "text/plain": [ "recommendations finished on 26000/34355 queries. users per second: 21987.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 27000/34355 queries. users per second: 21954.6
" ], "text/plain": [ "recommendations finished on 27000/34355 queries. users per second: 21954.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 28000/34355 queries. users per second: 22011.1
" ], "text/plain": [ "recommendations finished on 28000/34355 queries. users per second: 22011.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 29000/34355 queries. users per second: 22063.8
" ], "text/plain": [ "recommendations finished on 29000/34355 queries. users per second: 22063.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 30000/34355 queries. users per second: 22121.1
" ], "text/plain": [ "recommendations finished on 30000/34355 queries. users per second: 22121.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 31000/34355 queries. users per second: 22215.6
" ], "text/plain": [ "recommendations finished on 31000/34355 queries. users per second: 22215.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 32000/34355 queries. users per second: 22137.3
" ], "text/plain": [ "recommendations finished on 32000/34355 queries. users per second: 22137.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 33000/34355 queries. users per second: 22185.3
" ], "text/plain": [ "recommendations finished on 33000/34355 queries. users per second: 22185.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 34000/34355 queries. users per second: 22219.4
" ], "text/plain": [ "recommendations finished on 34000/34355 queries. users per second: 22219.4" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Precision and recall summary statistics by cutoff\n", "+--------+-------------------+-------------------+\n", "| cutoff | mean_precision | mean_recall |\n", "+--------+-------------------+-------------------+\n", "| 1 | 0.000320186290205 | 2.62069953527e-05 |\n", "| 2 | 0.000363848057051 | 0.000157978795732 |\n", "| 3 | 0.000368699364479 | 0.000246448831037 |\n", "| 4 | 0.000451171590744 | 0.00045461967667 |\n", "| 5 | 0.000518119633241 | 0.000646663196468 |\n", "| 6 | 0.000499684665017 | 0.000719981467646 |\n", "| 7 | 0.000478200303553 | 0.000800163591435 |\n", "| 8 | 0.000451171590744 | 0.000835628106468 |\n", "| 9 | 0.000501301767493 | 0.000996939435667 |\n", "| 10 | 0.000474457866395 | 0.00104312051081 |\n", "+--------+-------------------+-------------------+\n", "[10 rows x 3 columns]\n", "\n", "('\\nOverall RMSE: ', 6.470445808584627)\n", "\n", "Per User RMSE (best)\n", "+-------------------------------+-------+------+\n", "| user_id | count | rmse |\n", "+-------------------------------+-------+------+\n", "| 6d61c9b3678aa6c015ea9fd502... | 1 | 0.0 |\n", "+-------------------------------+-------+------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per User RMSE (worst)\n", "+-------------------------------+-------+---------------+\n", "| user_id | count | rmse |\n", "+-------------------------------+-------+---------------+\n", "| 50996bbabb6f7857bf0c801943... | 2 | 647.013311924 |\n", "+-------------------------------+-------+---------------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (best)\n", "+--------------------+-------+-----------------+\n", "| music_id | count | rmse |\n", "+--------------------+-------+-----------------+\n", "| SOXDPFW12A81C2319B | 8 | 0.0735294117647 |\n", "+--------------------+-------+-----------------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (worst)\n", "+--------------------+-------+---------------+\n", "| music_id | count | rmse |\n", "+--------------------+-------+---------------+\n", "| SOUAGPQ12A8AE47B3A | 8 | 323.517367637 |\n", "+--------------------+-------+---------------+\n", "[1 rows x 3 columns]\n", "\n", "PROGRESS: Evaluate model M1\n" ] }, { "data": { "text/html": [ "
recommendations finished on 1000/34355 queries. users per second: 17896.8
" ], "text/plain": [ "recommendations finished on 1000/34355 queries. users per second: 17896.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 2000/34355 queries. users per second: 20141.8
" ], "text/plain": [ "recommendations finished on 2000/34355 queries. users per second: 20141.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 3000/34355 queries. users per second: 21115
" ], "text/plain": [ "recommendations finished on 3000/34355 queries. users per second: 21115" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 4000/34355 queries. users per second: 21548.9
" ], "text/plain": [ "recommendations finished on 4000/34355 queries. users per second: 21548.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 5000/34355 queries. users per second: 22269.8
" ], "text/plain": [ "recommendations finished on 5000/34355 queries. users per second: 22269.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 6000/34355 queries. users per second: 22353.6
" ], "text/plain": [ "recommendations finished on 6000/34355 queries. users per second: 22353.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 7000/34355 queries. users per second: 22388.9
" ], "text/plain": [ "recommendations finished on 7000/34355 queries. users per second: 22388.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 8000/34355 queries. users per second: 22353.4
" ], "text/plain": [ "recommendations finished on 8000/34355 queries. users per second: 22353.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 9000/34355 queries. users per second: 22462.1
" ], "text/plain": [ "recommendations finished on 9000/34355 queries. users per second: 22462.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 10000/34355 queries. users per second: 22408.8
" ], "text/plain": [ "recommendations finished on 10000/34355 queries. users per second: 22408.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 11000/34355 queries. users per second: 21999.6
" ], "text/plain": [ "recommendations finished on 11000/34355 queries. users per second: 21999.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 12000/34355 queries. users per second: 22154.4
" ], "text/plain": [ "recommendations finished on 12000/34355 queries. users per second: 22154.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 13000/34355 queries. users per second: 22417.7
" ], "text/plain": [ "recommendations finished on 13000/34355 queries. users per second: 22417.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 14000/34355 queries. users per second: 22434.6
" ], "text/plain": [ "recommendations finished on 14000/34355 queries. users per second: 22434.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 15000/34355 queries. users per second: 22602.1
" ], "text/plain": [ "recommendations finished on 15000/34355 queries. users per second: 22602.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 16000/34355 queries. users per second: 22675.6
" ], "text/plain": [ "recommendations finished on 16000/34355 queries. users per second: 22675.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 17000/34355 queries. users per second: 22789.5
" ], "text/plain": [ "recommendations finished on 17000/34355 queries. users per second: 22789.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 18000/34355 queries. users per second: 22858.7
" ], "text/plain": [ "recommendations finished on 18000/34355 queries. users per second: 22858.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 19000/34355 queries. users per second: 22980.9
" ], "text/plain": [ "recommendations finished on 19000/34355 queries. users per second: 22980.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 20000/34355 queries. users per second: 23016.8
" ], "text/plain": [ "recommendations finished on 20000/34355 queries. users per second: 23016.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 21000/34355 queries. users per second: 23015.3
" ], "text/plain": [ "recommendations finished on 21000/34355 queries. users per second: 23015.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 22000/34355 queries. users per second: 23089
" ], "text/plain": [ "recommendations finished on 22000/34355 queries. users per second: 23089" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 23000/34355 queries. users per second: 23022.7
" ], "text/plain": [ "recommendations finished on 23000/34355 queries. users per second: 23022.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 24000/34355 queries. users per second: 23052.6
" ], "text/plain": [ "recommendations finished on 24000/34355 queries. users per second: 23052.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 25000/34355 queries. users per second: 22970.8
" ], "text/plain": [ "recommendations finished on 25000/34355 queries. users per second: 22970.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 26000/34355 queries. users per second: 22781.5
" ], "text/plain": [ "recommendations finished on 26000/34355 queries. users per second: 22781.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 27000/34355 queries. users per second: 22733.5
" ], "text/plain": [ "recommendations finished on 27000/34355 queries. users per second: 22733.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 28000/34355 queries. users per second: 22789.1
" ], "text/plain": [ "recommendations finished on 28000/34355 queries. users per second: 22789.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 29000/34355 queries. users per second: 22741.9
" ], "text/plain": [ "recommendations finished on 29000/34355 queries. users per second: 22741.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 30000/34355 queries. users per second: 22669.3
" ], "text/plain": [ "recommendations finished on 30000/34355 queries. users per second: 22669.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 31000/34355 queries. users per second: 22684
" ], "text/plain": [ "recommendations finished on 31000/34355 queries. users per second: 22684" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 32000/34355 queries. users per second: 22677.7
" ], "text/plain": [ "recommendations finished on 32000/34355 queries. users per second: 22677.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 33000/34355 queries. users per second: 22675.2
" ], "text/plain": [ "recommendations finished on 33000/34355 queries. users per second: 22675.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 34000/34355 queries. users per second: 22616.2
" ], "text/plain": [ "recommendations finished on 34000/34355 queries. users per second: 22616.2" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Precision and recall summary statistics by cutoff\n", "+--------+-----------------+-----------------+\n", "| cutoff | mean_precision | mean_recall |\n", "+--------+-----------------+-----------------+\n", "| 1 | 0.0505894338524 | 0.0151970338993 |\n", "| 2 | 0.0616795226313 | 0.0336650217808 |\n", "| 3 | 0.0729927715519 | 0.0543725425713 |\n", "| 4 | 0.0751055159365 | 0.0704135431553 |\n", "| 5 | 0.0741726095183 | 0.0842718589105 |\n", "| 6 | 0.0725076408092 | 0.0963338861128 |\n", "| 7 | 0.0700792149198 | 0.106287905834 |\n", "| 8 | 0.0680978023577 | 0.115967909942 |\n", "| 9 | 0.0657740261 | 0.124363659149 |\n", "| 10 | 0.0636239266482 | 0.13230804769 |\n", "+--------+-----------------+-----------------+\n", "[10 rows x 3 columns]\n", "\n", "('\\nOverall RMSE: ', 7.150718576843326)\n", "\n", "Per User RMSE (best)\n", "+-------------------------------+-------+-------------------+\n", "| user_id | count | rmse |\n", "+-------------------------------+-------+-------------------+\n", "| dad5cd4678a6f6df34932432bc... | 1 | 0.000917145184108 |\n", "+-------------------------------+-------+-------------------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per User RMSE (worst)\n", "+-------------------------------+-------+---------------+\n", "| user_id | count | rmse |\n", "+-------------------------------+-------+---------------+\n", "| 50996bbabb6f7857bf0c801943... | 2 | 650.121367005 |\n", "+-------------------------------+-------+---------------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (best)\n", "+--------------------+-------+---------------+\n", "| music_id | count | rmse |\n", "+--------------------+-------+---------------+\n", "| SOJUKCL12A6D4F7DF7 | 3 | 0.75550628309 |\n", "+--------------------+-------+---------------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (worst)\n", "+--------------------+-------+---------------+\n", "| music_id | count | rmse |\n", "+--------------------+-------+---------------+\n", "| SOUAGPQ12A8AE47B3A | 8 | 325.077941681 |\n", "+--------------------+-------+---------------+\n", "[1 rows x 3 columns]\n", "\n", "PROGRESS: Evaluate model M2\n" ] }, { "data": { "text/html": [ "
recommendations finished on 1000/34355 queries. users per second: 16771.2
" ], "text/plain": [ "recommendations finished on 1000/34355 queries. users per second: 16771.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 2000/34355 queries. users per second: 18825.8
" ], "text/plain": [ "recommendations finished on 2000/34355 queries. users per second: 18825.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 3000/34355 queries. users per second: 19420.1
" ], "text/plain": [ "recommendations finished on 3000/34355 queries. users per second: 19420.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 4000/34355 queries. users per second: 20088.8
" ], "text/plain": [ "recommendations finished on 4000/34355 queries. users per second: 20088.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 5000/34355 queries. users per second: 20395.8
" ], "text/plain": [ "recommendations finished on 5000/34355 queries. users per second: 20395.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 6000/34355 queries. users per second: 20666.2
" ], "text/plain": [ "recommendations finished on 6000/34355 queries. users per second: 20666.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 7000/34355 queries. users per second: 20774.3
" ], "text/plain": [ "recommendations finished on 7000/34355 queries. users per second: 20774.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 8000/34355 queries. users per second: 20775.1
" ], "text/plain": [ "recommendations finished on 8000/34355 queries. users per second: 20775.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 9000/34355 queries. users per second: 20780.4
" ], "text/plain": [ "recommendations finished on 9000/34355 queries. users per second: 20780.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 10000/34355 queries. users per second: 20950.3
" ], "text/plain": [ "recommendations finished on 10000/34355 queries. users per second: 20950.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 11000/34355 queries. users per second: 20620.6
" ], "text/plain": [ "recommendations finished on 11000/34355 queries. users per second: 20620.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 12000/34355 queries. users per second: 20654.8
" ], "text/plain": [ "recommendations finished on 12000/34355 queries. users per second: 20654.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 13000/34355 queries. users per second: 20770.8
" ], "text/plain": [ "recommendations finished on 13000/34355 queries. users per second: 20770.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 14000/34355 queries. users per second: 20732.1
" ], "text/plain": [ "recommendations finished on 14000/34355 queries. users per second: 20732.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 15000/34355 queries. users per second: 20729.6
" ], "text/plain": [ "recommendations finished on 15000/34355 queries. users per second: 20729.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 16000/34355 queries. users per second: 20752.5
" ], "text/plain": [ "recommendations finished on 16000/34355 queries. users per second: 20752.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 17000/34355 queries. users per second: 20774.8
" ], "text/plain": [ "recommendations finished on 17000/34355 queries. users per second: 20774.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 18000/34355 queries. users per second: 20801.7
" ], "text/plain": [ "recommendations finished on 18000/34355 queries. users per second: 20801.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 19000/34355 queries. users per second: 20786.5
" ], "text/plain": [ "recommendations finished on 19000/34355 queries. users per second: 20786.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 20000/34355 queries. users per second: 20808.1
" ], "text/plain": [ "recommendations finished on 20000/34355 queries. users per second: 20808.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 21000/34355 queries. users per second: 20682
" ], "text/plain": [ "recommendations finished on 21000/34355 queries. users per second: 20682" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 22000/34355 queries. users per second: 20684.4
" ], "text/plain": [ "recommendations finished on 22000/34355 queries. users per second: 20684.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 23000/34355 queries. users per second: 20683.2
" ], "text/plain": [ "recommendations finished on 23000/34355 queries. users per second: 20683.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 24000/34355 queries. users per second: 20687.9
" ], "text/plain": [ "recommendations finished on 24000/34355 queries. users per second: 20687.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 25000/34355 queries. users per second: 20722.5
" ], "text/plain": [ "recommendations finished on 25000/34355 queries. users per second: 20722.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 26000/34355 queries. users per second: 20771.3
" ], "text/plain": [ "recommendations finished on 26000/34355 queries. users per second: 20771.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 27000/34355 queries. users per second: 20842.5
" ], "text/plain": [ "recommendations finished on 27000/34355 queries. users per second: 20842.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 28000/34355 queries. users per second: 20878
" ], "text/plain": [ "recommendations finished on 28000/34355 queries. users per second: 20878" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 29000/34355 queries. users per second: 20863.7
" ], "text/plain": [ "recommendations finished on 29000/34355 queries. users per second: 20863.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 30000/34355 queries. users per second: 20824.5
" ], "text/plain": [ "recommendations finished on 30000/34355 queries. users per second: 20824.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 31000/34355 queries. users per second: 20777.4
" ], "text/plain": [ "recommendations finished on 31000/34355 queries. users per second: 20777.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 32000/34355 queries. users per second: 20721.2
" ], "text/plain": [ "recommendations finished on 32000/34355 queries. users per second: 20721.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 33000/34355 queries. users per second: 20763
" ], "text/plain": [ "recommendations finished on 33000/34355 queries. users per second: 20763" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 34000/34355 queries. users per second: 20792.3
" ], "text/plain": [ "recommendations finished on 34000/34355 queries. users per second: 20792.3" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Precision and recall summary statistics by cutoff\n", "+--------+-------------------+-------------------+\n", "| cutoff | mean_precision | mean_recall |\n", "+--------+-------------------+-------------------+\n", "| 1 | 0.000436617668462 | 8.84193897883e-05 |\n", "| 2 | 0.000392955901615 | 0.000146983613211 |\n", "| 3 | 0.000397807209043 | 0.000273528343889 |\n", "| 4 | 0.000400232862757 | 0.000328267207427 |\n", "| 5 | 0.000424974530636 | 0.00040585165237 |\n", "| 6 | 0.000460874205598 | 0.000574532683748 |\n", "| 7 | 0.00049483335759 | 0.000721448772082 |\n", "| 8 | 0.000531218163295 | 0.000887467388127 |\n", "| 9 | 0.00055628325167 | 0.00109888572439 |\n", "| 10 | 0.000593800029108 | 0.00130662212516 |\n", "+--------+-------------------+-------------------+\n", "[10 rows x 3 columns]\n", "\n", "('\\nOverall RMSE: ', 8.111262615677196)\n", "\n", "Per User RMSE (best)\n", "+-------------------------------+-------+-------------------+\n", "| user_id | count | rmse |\n", "+-------------------------------+-------+-------------------+\n", "| 06aad545e9390a6332a7fee7f3... | 1 | 0.000536064571246 |\n", "+-------------------------------+-------+-------------------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per User RMSE (worst)\n", "+-------------------------------+-------+---------------+\n", "| user_id | count | rmse |\n", "+-------------------------------+-------+---------------+\n", "| 50996bbabb6f7857bf0c801943... | 2 | 689.967975368 |\n", "+-------------------------------+-------+---------------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (best)\n", "+--------------------+-------+-----------------+\n", "| music_id | count | rmse |\n", "+--------------------+-------+-----------------+\n", "| SOJWSGL12A81C217B0 | 2 | 0.0503145302263 |\n", "+--------------------+-------+-----------------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (worst)\n", "+--------------------+-------+---------------+\n", "| music_id | count | rmse |\n", "+--------------------+-------+---------------+\n", "| SOUAGPQ12A8AE47B3A | 8 | 345.207580713 |\n", "+--------------------+-------+---------------+\n", "[1 rows x 3 columns]\n", "\n" ] } ], "source": [ "result = gl.recommender.util.compare_models(test_set, \n", " [popularity_model, item_sim_model, factorization_machine_model],\n", " user_sample=.5, skip_set=train_set)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:09:56.940553Z", "start_time": "2018-05-05T06:09:56.453602Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "K = 10\n", "users = gl.SArray(sf['user_id'].unique().head(100))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2018-05-05T06:09:57.774575Z", "start_time": "2018-05-05T06:09:57.694466Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_idmusic_idscorerank
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOXUQNR12AF72A69D63.022422651451
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOUFAZA12AC3DFAB201.336842775342
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOSFSTC12A8C1412191.091982126243
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOVIWFP12A58A7D1BD1.045163869864
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOBMTQD12AB01833D01.029451688135
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOCMNRG12AB0189D3F0.9756437937426
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOXOHUM12A67ADC8260.9506873289747
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOWBFVW12A6D4F612B0.9092370669058
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOXFYTY127E9433E7D0.8977278073639
279292bb36dbfc7f505e36ebf
038c81eb1d1d63e ...
SOYBLYP12A58A79D320.89709281921410
\n", "[10 rows x 4 columns]
\n", "
" ], "text/plain": [ "Columns:\n", "\tuser_id\tstr\n", "\tmusic_id\tstr\n", "\tscore\tfloat\n", "\trank\tint\n", "\n", "Rows: 10\n", "\n", "Data:\n", "+-------------------------------+--------------------+----------------+------+\n", "| user_id | music_id | score | rank |\n", "+-------------------------------+--------------------+----------------+------+\n", "| 279292bb36dbfc7f505e36ebf0... | SOXUQNR12AF72A69D6 | 3.02242265145 | 1 |\n", "| 279292bb36dbfc7f505e36ebf0... | SOUFAZA12AC3DFAB20 | 1.33684277534 | 2 |\n", "| 279292bb36dbfc7f505e36ebf0... | SOSFSTC12A8C141219 | 1.09198212624 | 3 |\n", "| 279292bb36dbfc7f505e36ebf0... | SOVIWFP12A58A7D1BD | 1.04516386986 | 4 |\n", "| 279292bb36dbfc7f505e36ebf0... | SOBMTQD12AB01833D0 | 1.02945168813 | 5 |\n", "| 279292bb36dbfc7f505e36ebf0... | SOCMNRG12AB0189D3F | 0.975643793742 | 6 |\n", "| 279292bb36dbfc7f505e36ebf0... | SOXOHUM12A67ADC826 | 0.950687328974 | 7 |\n", "| 279292bb36dbfc7f505e36ebf0... | SOWBFVW12A6D4F612B | 0.909237066905 | 8 |\n", "| 279292bb36dbfc7f505e36ebf0... | SOXFYTY127E9433E7D | 0.897727807363 | 9 |\n", "| 279292bb36dbfc7f505e36ebf0... | SOYBLYP12A58A79D32 | 0.897092819214 | 10 |\n", "+-------------------------------+--------------------+----------------+------+\n", "[10 rows x 4 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "recs = item_sim_model.recommend(users=users, k=K)\n", "recs.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python [conda env:anaconda]", "language": "python", "name": "conda-env-anaconda-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" }, "latex_envs": { "LaTeX_envs_menu_present": true, "autoclose": false, "autocomplete": true, "bibliofile": "biblio.bib", "cite_by": "apalike", "current_citInitial": 1, "eqLabelWithNumbers": true, "eqNumInitial": 0, "hotkeys": { "equation": "Ctrl-E", "itemize": "Ctrl-I" }, "labels_anchors": false, "latex_user_defs": false, "report_style_numbering": false, "user_envs_cfg": false }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": false, "sideBar": false, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": { "height": "48px", "left": "930px", "top": "110.398px", "width": "159px" }, "toc_section_display": false, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 1 }