{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "\n", "# 使用Turicreate进行音乐推荐\n", "\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T07:57:48.013117Z", "start_time": "2020-06-13T05:56:30.069715Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting turicreate\n", " Downloading turicreate-6.3-cp37-cp37m-macosx_10_12_intel.macosx_10_12_x86_64.macosx_10_13_intel.macosx_10_13_x86_64.macosx_10_14_intel.macosx_10_14_x86_64.whl (33.1 MB)\n", "\u001b[K |████████████████████████████████| 33.1 MB 13 kB/s eta 0:00:0197 |████████▍ | 8.6 MB 19 kB/s eta 0:20:35 |███████████████████▌ | 20.2 MB 24 kB/s eta 0:08:42 |███████████████████▊ | 20.4 MB 12 kB/s eta 0:16:49 |██████████████████████████████▉ | 31.8 MB 39 kB/s eta 0:00:32\n", "\u001b[?25hCollecting coremltools==3.3\n", " Downloading coremltools-3.3-cp37-none-macosx_10_14_intel.whl (3.5 MB)\n", "\u001b[K |████████████████████████████████| 3.5 MB 18 kB/s eta 0:00:0115 |███████▋ | 829 kB 12 kB/s eta 0:03:30\n", "\u001b[?25hRequirement already satisfied: requests>=2.9.1 in /opt/anaconda3/lib/python3.7/site-packages (from turicreate) (2.22.0)\n", "Requirement already satisfied: pandas>=0.23.2 in /opt/anaconda3/lib/python3.7/site-packages (from turicreate) (1.0.1)\n", "Requirement already satisfied: numpy in /opt/anaconda3/lib/python3.7/site-packages (from turicreate) (1.18.1)\n", "Requirement already satisfied: decorator>=4.0.9 in /opt/anaconda3/lib/python3.7/site-packages (from turicreate) (4.4.1)\n", "Requirement already satisfied: pillow>=5.2.0 in /opt/anaconda3/lib/python3.7/site-packages (from turicreate) (7.0.0)\n", "Requirement already satisfied: prettytable==0.7.2 in /opt/anaconda3/lib/python3.7/site-packages (from turicreate) (0.7.2)\n", "Requirement already satisfied: six>=1.10.0 in /opt/anaconda3/lib/python3.7/site-packages (from turicreate) (1.14.0)\n", "Collecting resampy==0.2.1\n", " Using cached resampy-0.2.1.tar.gz (322 kB)\n", "Collecting tensorflow>=2.0.0\n", " Downloading tensorflow-2.2.0-cp37-cp37m-macosx_10_11_x86_64.whl (175.3 MB)\n", "\u001b[K |████████████████████████████████| 175.3 MB 8.8 kB/s ta 0:00:018 |██▏ | 12.0 MB 62 kB/s eta 0:43:16 |████████▎ | 45.2 MB 21 kB/s eta 1:39:12 |████████▎ | 45.5 MB 17 kB/s eta 2:07:02 |█████████ | 48.7 MB 36 kB/s eta 0:58:32 |█████████▍ | 51.2 MB 40 kB/s eta 0:50:59 |██████████▍ | 56.8 MB 14 kB/s eta 2:11:51 |██████████████▊ | 81.0 MB 30 kB/s eta 0:51:49 |███████████████▉ | 86.8 MB 36 kB/s eta 0:40:44 |████████████████▍ | 90.1 MB 34 kB/s eta 0:40:50 |█████████████████ | 92.9 MB 35 kB/s eta 0:38:29 |██████████████████ | 99.0 MB 60 kB/s eta 0:20:57 |██████████████████▋ | 102.1 MB 76 kB/s eta 0:16:01 |██████████████████▉ | 103.4 MB 35 kB/s eta 0:33:33 |████████████████████▏ | 110.3 MB 23 kB/s eta 0:46:13 |████████████████████▏ | 110.5 MB 23 kB/s eta 0:45:17 |████████████████████▍ | 111.7 MB 53 kB/s eta 0:19:58 |██████████████████████▍ | 122.5 MB 91 kB/s eta 0:09:39 |███████████████████████ | 125.9 MB 119 kB/s eta 0:06:54 |██████████████████████████▋ | 145.9 MB 91 kB/s eta 0:05:22 |███████████████████████████▍ | 149.7 MB 22 kB/s eta 0:19:01 |████████████████████████████████| 175.3 MB 54 kB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: scipy>=1.1.0 in /opt/anaconda3/lib/python3.7/site-packages (from turicreate) (1.4.1)\n", "Collecting protobuf>=3.1.0\n", " Downloading protobuf-3.12.2-cp37-cp37m-macosx_10_9_x86_64.whl (1.3 MB)\n", "\u001b[K |████████████████████████████████| 1.3 MB 38 kB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: chardet<3.1.0,>=3.0.2 in /opt/anaconda3/lib/python3.7/site-packages (from requests>=2.9.1->turicreate) (3.0.4)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/anaconda3/lib/python3.7/site-packages (from requests>=2.9.1->turicreate) (1.25.8)\n", "Requirement already satisfied: idna<2.9,>=2.5 in /opt/anaconda3/lib/python3.7/site-packages (from requests>=2.9.1->turicreate) (2.8)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/lib/python3.7/site-packages (from requests>=2.9.1->turicreate) (2019.11.28)\n", "Requirement already satisfied: pytz>=2017.2 in /opt/anaconda3/lib/python3.7/site-packages (from pandas>=0.23.2->turicreate) (2019.3)\n", "Requirement already satisfied: python-dateutil>=2.6.1 in /opt/anaconda3/lib/python3.7/site-packages (from pandas>=0.23.2->turicreate) (2.8.1)\n", "Requirement already satisfied: numba>=0.32 in /opt/anaconda3/lib/python3.7/site-packages (from resampy==0.2.1->turicreate) (0.48.0)\n", "Collecting termcolor>=1.1.0\n", " Downloading termcolor-1.1.0.tar.gz (3.9 kB)\n", "Collecting tensorflow-estimator<2.3.0,>=2.2.0\n", " Downloading tensorflow_estimator-2.2.0-py2.py3-none-any.whl (454 kB)\n", "\u001b[K |████████████████████████████████| 454 kB 49 kB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: wheel>=0.26; python_version >= \"3\" in /opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.0.0->turicreate) (0.34.2)\n", "Requirement already satisfied: wrapt>=1.11.1 in /opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.0.0->turicreate) (1.11.2)\n", "Collecting keras-preprocessing>=1.1.0\n", " Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)\n", "\u001b[K |████████████████████████████████| 42 kB 67 kB/s eta 0:00:01\n", "\u001b[?25hCollecting google-pasta>=0.1.8\n", " Downloading google_pasta-0.2.0-py3-none-any.whl (57 kB)\n", "\u001b[K |████████████████████████████████| 57 kB 53 kB/s eta 0:00:01\n", "\u001b[?25hCollecting astunparse==1.6.3\n", " Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)\n", "Collecting grpcio>=1.8.6\n", " Downloading grpcio-1.29.0-cp37-cp37m-macosx_10_9_x86_64.whl (2.8 MB)\n", "\u001b[K |████████████████████████████████| 2.8 MB 95 kB/s eta 0:00:011\n", "\u001b[?25hCollecting gast==0.3.3\n", " Downloading gast-0.3.3-py2.py3-none-any.whl (9.7 kB)\n", "Collecting opt-einsum>=2.3.2\n", " Downloading opt_einsum-3.2.1-py3-none-any.whl (63 kB)\n", "\u001b[K |████████████████████████████████| 63 kB 71 kB/s eta 0:00:011\n", "\u001b[?25hCollecting tensorboard<2.3.0,>=2.2.0\n", " Downloading tensorboard-2.2.2-py3-none-any.whl (3.0 MB)\n", "\u001b[K |████████████████████████████████| 3.0 MB 31 kB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: h5py<2.11.0,>=2.10.0 in /opt/anaconda3/lib/python3.7/site-packages (from tensorflow>=2.0.0->turicreate) (2.10.0)\n", "Collecting absl-py>=0.7.0\n", " Downloading absl-py-0.9.0.tar.gz (104 kB)\n", "\u001b[K |████████████████████████████████| 104 kB 23 kB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: setuptools in /opt/anaconda3/lib/python3.7/site-packages (from protobuf>=3.1.0->coremltools==3.3->turicreate) (46.0.0.post20200309)\n", "Requirement already satisfied: llvmlite<0.32.0,>=0.31.0dev0 in /opt/anaconda3/lib/python3.7/site-packages (from numba>=0.32->resampy==0.2.1->turicreate) (0.31.0)\n", "Collecting tensorboard-plugin-wit>=1.6.0\n", " Downloading tensorboard_plugin_wit-1.6.0.post3-py3-none-any.whl (777 kB)\n", "\u001b[K |████████████████████████████████| 777 kB 40 kB/s eta 0:00:01\n", "\u001b[?25hCollecting google-auth-oauthlib<0.5,>=0.4.1\n", " Downloading google_auth_oauthlib-0.4.1-py2.py3-none-any.whl (18 kB)\n", "Collecting google-auth<2,>=1.6.3\n", " Downloading google_auth-1.17.2-py2.py3-none-any.whl (90 kB)\n", "\u001b[K |████████████████████████████████| 90 kB 65 kB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: werkzeug>=0.11.15 in /opt/anaconda3/lib/python3.7/site-packages (from tensorboard<2.3.0,>=2.2.0->tensorflow>=2.0.0->turicreate) (1.0.0)\n", "Collecting markdown>=2.6.8\n", " Downloading Markdown-3.2.2-py3-none-any.whl (88 kB)\n", "\u001b[K |████████████████████████████████| 88 kB 49 kB/s eta 0:00:01\n", "\u001b[?25hCollecting requests-oauthlib>=0.7.0\n", " Downloading requests_oauthlib-1.3.0-py2.py3-none-any.whl (23 kB)\n", "Collecting pyasn1-modules>=0.2.1\n", " Downloading pyasn1_modules-0.2.8-py2.py3-none-any.whl (155 kB)\n", "\u001b[K |████████████████████████████████| 155 kB 38 kB/s eta 0:00:01\n", "\u001b[?25hCollecting rsa<5,>=3.1.4; python_version >= \"3\"\n", " Downloading rsa-4.6-py2.py3-none-any.whl (34 kB)\n", "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /opt/anaconda3/lib/python3.7/site-packages (from google-auth<2,>=1.6.3->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.0.0->turicreate) (3.1.1)\n", "Requirement already satisfied: importlib-metadata; python_version < \"3.8\" in /opt/anaconda3/lib/python3.7/site-packages (from markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.0.0->turicreate) (1.5.0)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Collecting oauthlib>=3.0.0\n", " Downloading oauthlib-3.1.0-py2.py3-none-any.whl (147 kB)\n", "\u001b[K |████████████████████████████████| 147 kB 37 kB/s eta 0:00:01 |███████████████▋ | 71 kB 37 kB/s eta 0:00:03\n", "\u001b[?25hCollecting pyasn1<0.5.0,>=0.4.6\n", " Downloading pyasn1-0.4.8-py2.py3-none-any.whl (77 kB)\n", "\u001b[K |████████████████████████████████| 77 kB 59 kB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied: zipp>=0.5 in /opt/anaconda3/lib/python3.7/site-packages (from importlib-metadata; python_version < \"3.8\"->markdown>=2.6.8->tensorboard<2.3.0,>=2.2.0->tensorflow>=2.0.0->turicreate) (2.2.0)\n", "Building wheels for collected packages: resampy, termcolor, absl-py\n", " Building wheel for resampy (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for resampy: filename=resampy-0.2.1-py3-none-any.whl size=320848 sha256=13ea513477f71d513b03a44efe8db091cffc1adaab907123f0fa8d5babfdbeaf\n", " Stored in directory: /Users/datalab/Library/Caches/pip/wheels/71/74/53/d5ceb7c5ee7a168c7d106041863e71ac3273f4a4677743a284\n", " Building wheel for termcolor (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for termcolor: filename=termcolor-1.1.0-py3-none-any.whl size=4830 sha256=f6234bda25caf8e0d32efe0478ea409a20565a1652f05830529936dabbebc345\n", " Stored in directory: /Users/datalab/Library/Caches/pip/wheels/3f/e3/ec/8a8336ff196023622fbcb36de0c5a5c218cbb24111d1d4c7f2\n", " Building wheel for absl-py (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for absl-py: filename=absl_py-0.9.0-py3-none-any.whl size=121931 sha256=a0b4551addb6f776d7d3404e842ec5243b8894947bba6be78bc66c56963d37b5\n", " Stored in directory: /Users/datalab/Library/Caches/pip/wheels/cc/af/1a/498a24d0730ef484019e007bb9e8cef3ac00311a672c049a3e\n", "Successfully built resampy termcolor absl-py\n", "Installing collected packages: protobuf, coremltools, resampy, termcolor, tensorflow-estimator, keras-preprocessing, google-pasta, astunparse, grpcio, gast, opt-einsum, tensorboard-plugin-wit, pyasn1, pyasn1-modules, rsa, google-auth, oauthlib, requests-oauthlib, google-auth-oauthlib, absl-py, markdown, tensorboard, tensorflow, turicreate\n", "Successfully installed absl-py-0.9.0 astunparse-1.6.3 coremltools-3.3 gast-0.3.3 google-auth-1.17.2 google-auth-oauthlib-0.4.1 google-pasta-0.2.0 grpcio-1.29.0 keras-preprocessing-1.1.2 markdown-3.2.2 oauthlib-3.1.0 opt-einsum-3.2.1 protobuf-3.12.2 pyasn1-0.4.8 pyasn1-modules-0.2.8 requests-oauthlib-1.3.0 resampy-0.2.1 rsa-4.6 tensorboard-2.2.2 tensorboard-plugin-wit-1.6.0.post3 tensorflow-2.2.0 tensorflow-estimator-2.2.0 termcolor-1.1.0 turicreate-6.3\n" ] } ], "source": [ "!pip install turicreate" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T08:01:38.863973Z", "start_time": "2020-06-13T08:01:33.832987Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "import turicreate as tc" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ " 下载数据\n", "http://s3.amazonaws.com/dato-datasets/millionsong/10000.txt\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T08:01:53.444519Z", "start_time": "2020-06-13T08:01:44.510194Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "#train_file = 'http://s3.amazonaws.com/dato-datasets/millionsong/10000.txt'\n", "train_file = '/Users/datalab/bigdata/cjc/millionsong/song_usage_10000.txt'\n", "sf = tc.SFrame.read_csv(train_file, header=False, delimiter='\\t', verbose=False)\n", "sf = sf.rename({'X1':'user_id', 'X2':'music_id', 'X3':'rating'})" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T08:01:56.655114Z", "start_time": "2020-06-13T08:01:56.643344Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "train_set, test_set = sf.random_split(0.8, seed=1)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T08:02:05.296674Z", "start_time": "2020-06-13T08:01:57.677978Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
Preparing data set." ], "text/plain": [ "Preparing data set." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Data has 1599753 observations with 76085 users and 10000 items." ], "text/plain": [ " Data has 1599753 observations with 76085 users and 10000 items." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Data prepared in: 4.15079s" ], "text/plain": [ " Data prepared in: 4.15079s" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
1599753 observations to process; with 10000 unique items." ], "text/plain": [ "1599753 observations to process; with 10000 unique items." ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "popularity_model = tc.popularity_recommender.create(train_set, \n", " 'user_id', 'music_id', \n", " target = 'rating')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T08:02:17.217932Z", "start_time": "2020-06-13T08:02:08.102763Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
Preparing data set." ], "text/plain": [ "Preparing data set." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Data has 1599753 observations with 76085 users and 10000 items." ], "text/plain": [ " Data has 1599753 observations with 76085 users and 10000 items." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Data prepared in: 3.7942s" ], "text/plain": [ " Data prepared in: 3.7942s" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Training model from provided data." ], "text/plain": [ "Training model from provided data." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Gathering per-item and per-user statistics." ], "text/plain": [ "Gathering per-item and per-user statistics." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+------------+" ], "text/plain": [ "+--------------------------------+------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Elapsed Time (Item Statistics) | % Complete |" ], "text/plain": [ "| Elapsed Time (Item Statistics) | % Complete |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+------------+" ], "text/plain": [ "+--------------------------------+------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 7.569ms | 2.5 |" ], "text/plain": [ "| 7.569ms | 2.5 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 90.88ms | 100 |" ], "text/plain": [ "| 90.88ms | 100 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+------------+" ], "text/plain": [ "+--------------------------------+------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Setting up lookup tables." ], "text/plain": [ "Setting up lookup tables." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Processing data in one pass using dense lookup tables." ], "text/plain": [ "Processing data in one pass using dense lookup tables." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+-------------------------------------+------------------+-----------------+" ], "text/plain": [ "+-------------------------------------+------------------+-----------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |" ], "text/plain": [ "| Elapsed Time (Constructing Lookups) | Total % Complete | Items Processed |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+-------------------------------------+------------------+-----------------+" ], "text/plain": [ "+-------------------------------------+------------------+-----------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 605.016ms | 0 | 0 |" ], "text/plain": [ "| 605.016ms | 0 | 0 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 4.01s | 100 | 10000 |" ], "text/plain": [ "| 4.01s | 100 | 10000 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+-------------------------------------+------------------+-----------------+" ], "text/plain": [ "+-------------------------------------+------------------+-----------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Finalizing lookup tables." ], "text/plain": [ "Finalizing lookup tables." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Generating candidate set for working with new users." ], "text/plain": [ "Generating candidate set for working with new users." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Finished training in 5.31028s" ], "text/plain": [ "Finished training in 5.31028s" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "item_sim_model = tc.item_similarity_recommender.create(train_set, \n", " 'user_id', 'music_id', \n", " target = 'rating', \n", " similarity_type='cosine')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T08:03:05.251295Z", "start_time": "2020-06-13T08:02:23.686739Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
Preparing data set." ], "text/plain": [ "Preparing data set." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Data has 1599753 observations with 76085 users and 10000 items." ], "text/plain": [ " Data has 1599753 observations with 76085 users and 10000 items." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Data prepared in: 4.32575s" ], "text/plain": [ " Data prepared in: 4.32575s" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Training factorization_recommender for recommendations." ], "text/plain": [ "Training factorization_recommender for recommendations." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+--------------------------------------------------+----------+" ], "text/plain": [ "+--------------------------------+--------------------------------------------------+----------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Parameter | Description | Value |" ], "text/plain": [ "| Parameter | Description | Value |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+--------------------------------------------------+----------+" ], "text/plain": [ "+--------------------------------+--------------------------------------------------+----------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| num_factors | Factor Dimension | 8 |" ], "text/plain": [ "| num_factors | Factor Dimension | 8 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| regularization | L2 Regularization on Factors | 1e-08 |" ], "text/plain": [ "| regularization | L2 Regularization on Factors | 1e-08 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| solver | Solver used for training | sgd |" ], "text/plain": [ "| solver | Solver used for training | sgd |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| linear_regularization | L2 Regularization on Linear Coefficients | 1e-10 |" ], "text/plain": [ "| linear_regularization | L2 Regularization on Linear Coefficients | 1e-10 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| max_iterations | Maximum Number of Iterations | 50 |" ], "text/plain": [ "| max_iterations | Maximum Number of Iterations | 50 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+--------------------------------+--------------------------------------------------+----------+" ], "text/plain": [ "+--------------------------------+--------------------------------------------------+----------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Optimizing model using SGD; tuning step size." ], "text/plain": [ " Optimizing model using SGD; tuning step size." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Using 199969 / 1599753 points for tuning the step size." ], "text/plain": [ " Using 199969 / 1599753 points for tuning the step size." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Attempt | Initial Step Size | Estimated Objective Value |" ], "text/plain": [ "| Attempt | Initial Step Size | Estimated Objective Value |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 0 | 25 | No Decrease (230.933 >= 43.5401) |" ], "text/plain": [ "| 0 | 25 | No Decrease (230.933 >= 43.5401) |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 1 | 6.25 | No Decrease (219.447 >= 43.5401) |" ], "text/plain": [ "| 1 | 6.25 | No Decrease (219.447 >= 43.5401) |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 2 | 1.5625 | No Decrease (191.895 >= 43.5401) |" ], "text/plain": [ "| 2 | 1.5625 | No Decrease (191.895 >= 43.5401) |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 3 | 0.390625 | No Decrease (89.356 >= 43.5401) |" ], "text/plain": [ "| 3 | 0.390625 | No Decrease (89.356 >= 43.5401) |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 4 | 0.0976562 | 16.0024 |" ], "text/plain": [ "| 4 | 0.0976562 | 16.0024 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 5 | 0.0488281 | 11.4371 |" ], "text/plain": [ "| 5 | 0.0488281 | 11.4371 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 6 | 0.0244141 | 24.5498 |" ], "text/plain": [ "| 6 | 0.0244141 | 24.5498 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Final | 0.0488281 | 11.4371 |" ], "text/plain": [ "| Final | 0.0488281 | 11.4371 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+-------------------+------------------------------------------+" ], "text/plain": [ "+---------+-------------------+------------------------------------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Starting Optimization." ], "text/plain": [ "Starting Optimization." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Iter. | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size |" ], "text/plain": [ "| Iter. | Elapsed Time | Approx. Objective | Approx. Training RMSE | Step Size |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| Initial | 387us | 43.795 | 6.61778 | |" ], "text/plain": [ "| Initial | 387us | 43.795 | 6.61778 | |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 1 | 681.41ms | 43.5465 | 6.59858 | 0.0488281 |" ], "text/plain": [ "| 1 | 681.41ms | 43.5465 | 6.59858 | 0.0488281 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 2 | 1.25s | 40.8911 | 6.39426 | 0.0290334 |" ], "text/plain": [ "| 2 | 1.25s | 40.8911 | 6.39426 | 0.0290334 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 3 | 1.97s | 37.9926 | 6.16345 | 0.0214205 |" ], "text/plain": [ "| 3 | 1.97s | 37.9926 | 6.16345 | 0.0214205 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 4 | 2.74s | 35.4229 | 5.95132 | 0.0172633 |" ], "text/plain": [ "| 4 | 2.74s | 35.4229 | 5.95132 | 0.0172633 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 5 | 3.26s | 32.7792 | 5.72487 | 0.014603 |" ], "text/plain": [ "| 5 | 3.26s | 32.7792 | 5.72487 | 0.014603 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 10 | 5.91s | 24.5046 | 4.94956 | 0.008683 |" ], "text/plain": [ "| 10 | 5.91s | 24.5046 | 4.94956 | 0.008683 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 15 | 9.07s | 20.0943 | 4.48185 | 0.00640622 |" ], "text/plain": [ "| 15 | 9.07s | 20.0943 | 4.48185 | 0.00640622 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 20 | 11.83s | 17.639 | 4.19895 | 0.00516295 |" ], "text/plain": [ "| 20 | 11.83s | 17.639 | 4.19895 | 0.00516295 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 25 | 14.27s | 15.7055 | 3.96197 | 0.00436732 |" ], "text/plain": [ "| 25 | 14.27s | 15.7055 | 3.96197 | 0.00436732 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 30 | 16.57s | 14.3953 | 3.79299 | 0.00380916 |" ], "text/plain": [ "| 30 | 16.57s | 14.3953 | 3.79299 | 0.00380916 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 35 | 18.71s | 13.3639 | 3.65445 | 0.00339327 |" ], "text/plain": [ "| 35 | 18.71s | 13.3639 | 3.65445 | 0.00339327 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 40 | 20.92s | 12.5027 | 3.53463 | 0.00306991 |" ], "text/plain": [ "| 40 | 20.92s | 12.5027 | 3.53463 | 0.00306991 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 45 | 23.89s | 11.8108 | 3.43534 | 0.00281035 |" ], "text/plain": [ "| 45 | 23.89s | 11.8108 | 3.43534 | 0.00281035 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
| 50 | 26.34s | 9.85419 | 3.13763 | 0.00154408 |" ], "text/plain": [ "| 50 | 26.34s | 9.85419 | 3.13763 | 0.00154408 |" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
+---------+--------------+-------------------+-----------------------+-------------+" ], "text/plain": [ "+---------+--------------+-------------------+-----------------------+-------------+" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Optimization Complete: Maximum number of passes through the data reached." ], "text/plain": [ "Optimization Complete: Maximum number of passes through the data reached." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Computing final objective value and training RMSE." ], "text/plain": [ "Computing final objective value and training RMSE." ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Final objective value: 8.8282" ], "text/plain": [ " Final objective value: 8.8282" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Final training RMSE: 2.96963" ], "text/plain": [ " Final training RMSE: 2.96963" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "factorization_machine_model = tc.recommender.factorization_recommender.create(train_set, \n", " 'user_id', 'music_id',\n", " target='rating')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T08:03:18.663826Z", "start_time": "2020-06-13T08:03:18.636917Z" } }, "outputs": [ { "data": { "text/plain": [ "1599753" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(train_set)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2020-06-13T08:04:48.668769Z", "start_time": "2020-06-13T08:03:22.701004Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "compare_models: using 34354 users to estimate model performance\n", "PROGRESS: Evaluate model M0\n" ] }, { "data": { "text/html": [ "
recommendations finished on 1000/34354 queries. users per second: 5393.6" ], "text/plain": [ "recommendations finished on 1000/34354 queries. users per second: 5393.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 2000/34354 queries. users per second: 5901.05" ], "text/plain": [ "recommendations finished on 2000/34354 queries. users per second: 5901.05" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 3000/34354 queries. users per second: 5891.65" ], "text/plain": [ "recommendations finished on 3000/34354 queries. users per second: 5891.65" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 4000/34354 queries. users per second: 5752.93" ], "text/plain": [ "recommendations finished on 4000/34354 queries. users per second: 5752.93" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 5000/34354 queries. users per second: 5841.69" ], "text/plain": [ "recommendations finished on 5000/34354 queries. users per second: 5841.69" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 6000/34354 queries. users per second: 5762.33" ], "text/plain": [ "recommendations finished on 6000/34354 queries. users per second: 5762.33" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 7000/34354 queries. users per second: 5834.76" ], "text/plain": [ "recommendations finished on 7000/34354 queries. users per second: 5834.76" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 8000/34354 queries. users per second: 5904.72" ], "text/plain": [ "recommendations finished on 8000/34354 queries. users per second: 5904.72" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 9000/34354 queries. users per second: 5766.33" ], "text/plain": [ "recommendations finished on 9000/34354 queries. users per second: 5766.33" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 10000/34354 queries. users per second: 5748.05" ], "text/plain": [ "recommendations finished on 10000/34354 queries. users per second: 5748.05" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 11000/34354 queries. users per second: 5619.56" ], "text/plain": [ "recommendations finished on 11000/34354 queries. users per second: 5619.56" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 12000/34354 queries. users per second: 5600.83" ], "text/plain": [ "recommendations finished on 12000/34354 queries. users per second: 5600.83" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 13000/34354 queries. users per second: 5659.63" ], "text/plain": [ "recommendations finished on 13000/34354 queries. users per second: 5659.63" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 14000/34354 queries. users per second: 5537.91" ], "text/plain": [ "recommendations finished on 14000/34354 queries. users per second: 5537.91" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 15000/34354 queries. users per second: 5566.55" ], "text/plain": [ "recommendations finished on 15000/34354 queries. users per second: 5566.55" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 16000/34354 queries. users per second: 5566.55" ], "text/plain": [ "recommendations finished on 16000/34354 queries. users per second: 5566.55" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 17000/34354 queries. users per second: 5541.39" ], "text/plain": [ "recommendations finished on 17000/34354 queries. users per second: 5541.39" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 18000/34354 queries. users per second: 5537.43" ], "text/plain": [ "recommendations finished on 18000/34354 queries. users per second: 5537.43" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 19000/34354 queries. users per second: 5494.31" ], "text/plain": [ "recommendations finished on 19000/34354 queries. users per second: 5494.31" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 20000/34354 queries. users per second: 5540.8" ], "text/plain": [ "recommendations finished on 20000/34354 queries. users per second: 5540.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 21000/34354 queries. users per second: 5567.68" ], "text/plain": [ "recommendations finished on 21000/34354 queries. users per second: 5567.68" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 22000/34354 queries. users per second: 5596" ], "text/plain": [ "recommendations finished on 22000/34354 queries. users per second: 5596" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 23000/34354 queries. users per second: 5594.48" ], "text/plain": [ "recommendations finished on 23000/34354 queries. users per second: 5594.48" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 24000/34354 queries. users per second: 5551.09" ], "text/plain": [ "recommendations finished on 24000/34354 queries. users per second: 5551.09" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 25000/34354 queries. users per second: 5561.67" ], "text/plain": [ "recommendations finished on 25000/34354 queries. users per second: 5561.67" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 26000/34354 queries. users per second: 5526.95" ], "text/plain": [ "recommendations finished on 26000/34354 queries. users per second: 5526.95" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 27000/34354 queries. users per second: 5465.2" ], "text/plain": [ "recommendations finished on 27000/34354 queries. users per second: 5465.2" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 28000/34354 queries. users per second: 5437.18" ], "text/plain": [ "recommendations finished on 28000/34354 queries. users per second: 5437.18" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 29000/34354 queries. users per second: 5444.31" ], "text/plain": [ "recommendations finished on 29000/34354 queries. users per second: 5444.31" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 30000/34354 queries. users per second: 5452.98" ], "text/plain": [ "recommendations finished on 30000/34354 queries. users per second: 5452.98" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 31000/34354 queries. users per second: 5430.37" ], "text/plain": [ "recommendations finished on 31000/34354 queries. users per second: 5430.37" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 32000/34354 queries. users per second: 5407.57" ], "text/plain": [ "recommendations finished on 32000/34354 queries. users per second: 5407.57" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 33000/34354 queries. users per second: 5386.39" ], "text/plain": [ "recommendations finished on 33000/34354 queries. users per second: 5386.39" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 34000/34354 queries. users per second: 5405.75" ], "text/plain": [ "recommendations finished on 34000/34354 queries. users per second: 5405.75" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Precision and recall summary statistics by cutoff\n", "+--------+------------------------+------------------------+\n", "| cutoff | mean_precision | mean_recall |\n", "+--------+------------------------+------------------------+\n", "| 1 | 0.00040752168597543237 | 7.081875226074056e-05 |\n", "| 2 | 0.0004075216859754322 | 0.00011226035987334286 |\n", "| 3 | 0.0003104927131241391 | 0.00012739048212345128 |\n", "| 4 | 0.00034202712930080884 | 0.0002353030396709293 |\n", "| 5 | 0.00046573906968620603 | 0.00046444872494981606 |\n", "| 6 | 0.0004414818264733844 | 0.0005228618282305431 |\n", "| 7 | 0.00043247199328005 | 0.0005736297245772213 |\n", "| 8 | 0.0004075216859754325 | 0.0006087229198555468 |\n", "| 9 | 0.0004528018733060353 | 0.0007391431080220185 |\n", "| 10 | 0.00043080863945974164 | 0.0007735092654785608 |\n", "+--------+------------------------+------------------------+\n", "[10 rows x 3 columns]\n", "\n", "\n", "Overall RMSE: 5.9110406201585715\n", "\n", "Per User RMSE (best)\n", "+-------------------------------+------+-------+\n", "| user_id | rmse | count |\n", "+-------------------------------+------+-------+\n", "| cafbf96566378466408b7b3c76... | 0.0 | 1 |\n", "+-------------------------------+------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per User RMSE (worst)\n", "+-------------------------------+-------------------+-------+\n", "| user_id | rmse | count |\n", "+-------------------------------+-------------------+-------+\n", "| 38767872c514c1b43bab5c7b21... | 341.2071760874715 | 2 |\n", "+-------------------------------+-------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (best)\n", "+--------------------+---------------------+-------+\n", "| music_id | rmse | count |\n", "+--------------------+---------------------+-------+\n", "| SOXDPFW12A81C2319B | 0.07352941176470584 | 5 |\n", "+--------------------+---------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (worst)\n", "+--------------------+--------------------+-------+\n", "| music_id | rmse | count |\n", "+--------------------+--------------------+-------+\n", "| SOPKTFQ12A67021600 | 124.75180499529567 | 9 |\n", "+--------------------+--------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "PROGRESS: Evaluate model M1\n" ] }, { "data": { "text/html": [ "
recommendations finished on 1000/34354 queries. users per second: 5825.84" ], "text/plain": [ "recommendations finished on 1000/34354 queries. users per second: 5825.84" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 2000/34354 queries. users per second: 5301.82" ], "text/plain": [ "recommendations finished on 2000/34354 queries. users per second: 5301.82" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 3000/34354 queries. users per second: 5419.21" ], "text/plain": [ "recommendations finished on 3000/34354 queries. users per second: 5419.21" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 4000/34354 queries. users per second: 5627.83" ], "text/plain": [ "recommendations finished on 4000/34354 queries. users per second: 5627.83" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 5000/34354 queries. users per second: 5771.93" ], "text/plain": [ "recommendations finished on 5000/34354 queries. users per second: 5771.93" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 6000/34354 queries. users per second: 5585.3" ], "text/plain": [ "recommendations finished on 6000/34354 queries. users per second: 5585.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 7000/34354 queries. users per second: 5365.4" ], "text/plain": [ "recommendations finished on 7000/34354 queries. users per second: 5365.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 8000/34354 queries. users per second: 5147.5" ], "text/plain": [ "recommendations finished on 8000/34354 queries. users per second: 5147.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 9000/34354 queries. users per second: 5252.85" ], "text/plain": [ "recommendations finished on 9000/34354 queries. users per second: 5252.85" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 10000/34354 queries. users per second: 5301.11" ], "text/plain": [ "recommendations finished on 10000/34354 queries. users per second: 5301.11" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 11000/34354 queries. users per second: 5257.75" ], "text/plain": [ "recommendations finished on 11000/34354 queries. users per second: 5257.75" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 12000/34354 queries. users per second: 5181.17" ], "text/plain": [ "recommendations finished on 12000/34354 queries. users per second: 5181.17" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 13000/34354 queries. users per second: 5139.25" ], "text/plain": [ "recommendations finished on 13000/34354 queries. users per second: 5139.25" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 14000/34354 queries. users per second: 5155.66" ], "text/plain": [ "recommendations finished on 14000/34354 queries. users per second: 5155.66" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 15000/34354 queries. users per second: 4967.57" ], "text/plain": [ "recommendations finished on 15000/34354 queries. users per second: 4967.57" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 16000/34354 queries. users per second: 4921.83" ], "text/plain": [ "recommendations finished on 16000/34354 queries. users per second: 4921.83" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 17000/34354 queries. users per second: 4990.4" ], "text/plain": [ "recommendations finished on 17000/34354 queries. users per second: 4990.4" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 18000/34354 queries. users per second: 5068.35" ], "text/plain": [ "recommendations finished on 18000/34354 queries. users per second: 5068.35" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 19000/34354 queries. users per second: 5140.77" ], "text/plain": [ "recommendations finished on 19000/34354 queries. users per second: 5140.77" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 20000/34354 queries. users per second: 5213.8" ], "text/plain": [ "recommendations finished on 20000/34354 queries. users per second: 5213.8" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 21000/34354 queries. users per second: 5184.08" ], "text/plain": [ "recommendations finished on 21000/34354 queries. users per second: 5184.08" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 22000/34354 queries. users per second: 5094.51" ], "text/plain": [ "recommendations finished on 22000/34354 queries. users per second: 5094.51" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 23000/34354 queries. users per second: 5146.65" ], "text/plain": [ "recommendations finished on 23000/34354 queries. users per second: 5146.65" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 24000/34354 queries. users per second: 5170.89" ], "text/plain": [ "recommendations finished on 24000/34354 queries. users per second: 5170.89" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 25000/34354 queries. users per second: 5200.77" ], "text/plain": [ "recommendations finished on 25000/34354 queries. users per second: 5200.77" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 26000/34354 queries. users per second: 5183.28" ], "text/plain": [ "recommendations finished on 26000/34354 queries. users per second: 5183.28" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 27000/34354 queries. users per second: 5222.37" ], "text/plain": [ "recommendations finished on 27000/34354 queries. users per second: 5222.37" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 28000/34354 queries. users per second: 5251.88" ], "text/plain": [ "recommendations finished on 28000/34354 queries. users per second: 5251.88" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 29000/34354 queries. users per second: 5272.72" ], "text/plain": [ "recommendations finished on 29000/34354 queries. users per second: 5272.72" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 30000/34354 queries. users per second: 5288.9" ], "text/plain": [ "recommendations finished on 30000/34354 queries. users per second: 5288.9" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 31000/34354 queries. users per second: 5280.69" ], "text/plain": [ "recommendations finished on 31000/34354 queries. users per second: 5280.69" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 32000/34354 queries. users per second: 5272.44" ], "text/plain": [ "recommendations finished on 32000/34354 queries. users per second: 5272.44" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 33000/34354 queries. users per second: 5320.29" ], "text/plain": [ "recommendations finished on 33000/34354 queries. users per second: 5320.29" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 34000/34354 queries. users per second: 5353.61" ], "text/plain": [ "recommendations finished on 34000/34354 queries. users per second: 5353.61" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Precision and recall summary statistics by cutoff\n", "+--------+----------------------+----------------------+\n", "| cutoff | mean_precision | mean_recall |\n", "+--------+----------------------+----------------------+\n", "| 1 | 0.12356639692612195 | 0.027394188372094497 |\n", "| 2 | 0.10617395354252777 | 0.0450683741242837 |\n", "| 3 | 0.09521453105897464 | 0.059418188595173727 |\n", "| 4 | 0.08673662455609242 | 0.07079052813704197 |\n", "| 5 | 0.08053792862548775 | 0.08078157252034639 |\n", "| 6 | 0.07570685606722138 | 0.09033226689608059 |\n", "| 7 | 0.07142441304402039 | 0.09836168110578075 |\n", "| 8 | 0.06780869767712655 | 0.10631301678527204 |\n", "| 9 | 0.06465687320900597 | 0.1134367927631828 |\n", "| 10 | 0.061861791931070574 | 0.11986145197856829 |\n", "+--------+----------------------+----------------------+\n", "[10 rows x 3 columns]\n", "\n", "\n", "Overall RMSE: 6.6935880472475\n", "\n", "Per User RMSE (best)\n", "+-------------------------------+--------------------+-------+\n", "| user_id | rmse | count |\n", "+-------------------------------+--------------------+-------+\n", "| f015c8ec1487d172a76e8af6fd... | 0.0991658329963685 | 1 |\n", "+-------------------------------+--------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per User RMSE (worst)\n", "+-------------------------------+-------------------+-------+\n", "| user_id | rmse | count |\n", "+-------------------------------+-------------------+-------+\n", "| 38767872c514c1b43bab5c7b21... | 346.8117700274172 | 2 |\n", "+-------------------------------+-------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (best)\n", "+--------------------+--------------------+-------+\n", "| music_id | rmse | count |\n", "+--------------------+--------------------+-------+\n", "| SOAVQRP12A8C13120B | 0.8293558937901314 | 4 |\n", "+--------------------+--------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (worst)\n", "+--------------------+-------------------+-------+\n", "| music_id | rmse | count |\n", "+--------------------+-------------------+-------+\n", "| SOPKTFQ12A67021600 | 128.8226138396836 | 9 |\n", "+--------------------+-------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "PROGRESS: Evaluate model M2\n" ] }, { "data": { "text/html": [ "
recommendations finished on 1000/34354 queries. users per second: 5658.25" ], "text/plain": [ "recommendations finished on 1000/34354 queries. users per second: 5658.25" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 2000/34354 queries. users per second: 6360.82" ], "text/plain": [ "recommendations finished on 2000/34354 queries. users per second: 6360.82" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 3000/34354 queries. users per second: 6111.59" ], "text/plain": [ "recommendations finished on 3000/34354 queries. users per second: 6111.59" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 4000/34354 queries. users per second: 5757.38" ], "text/plain": [ "recommendations finished on 4000/34354 queries. users per second: 5757.38" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 5000/34354 queries. users per second: 5756.7" ], "text/plain": [ "recommendations finished on 5000/34354 queries. users per second: 5756.7" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 6000/34354 queries. users per second: 5927.31" ], "text/plain": [ "recommendations finished on 6000/34354 queries. users per second: 5927.31" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 7000/34354 queries. users per second: 6043.27" ], "text/plain": [ "recommendations finished on 7000/34354 queries. users per second: 6043.27" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 8000/34354 queries. users per second: 6053.29" ], "text/plain": [ "recommendations finished on 8000/34354 queries. users per second: 6053.29" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 9000/34354 queries. users per second: 6008.53" ], "text/plain": [ "recommendations finished on 9000/34354 queries. users per second: 6008.53" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 10000/34354 queries. users per second: 6029.76" ], "text/plain": [ "recommendations finished on 10000/34354 queries. users per second: 6029.76" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 11000/34354 queries. users per second: 5973.13" ], "text/plain": [ "recommendations finished on 11000/34354 queries. users per second: 5973.13" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 12000/34354 queries. users per second: 5982.24" ], "text/plain": [ "recommendations finished on 12000/34354 queries. users per second: 5982.24" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 13000/34354 queries. users per second: 5945.1" ], "text/plain": [ "recommendations finished on 13000/34354 queries. users per second: 5945.1" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 14000/34354 queries. users per second: 5940.6" ], "text/plain": [ "recommendations finished on 14000/34354 queries. users per second: 5940.6" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 15000/34354 queries. users per second: 5976.12" ], "text/plain": [ "recommendations finished on 15000/34354 queries. users per second: 5976.12" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 16000/34354 queries. users per second: 5915.28" ], "text/plain": [ "recommendations finished on 16000/34354 queries. users per second: 5915.28" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 17000/34354 queries. users per second: 5818.41" ], "text/plain": [ "recommendations finished on 17000/34354 queries. users per second: 5818.41" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 18000/34354 queries. users per second: 5838.5" ], "text/plain": [ "recommendations finished on 18000/34354 queries. users per second: 5838.5" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 19000/34354 queries. users per second: 5876.56" ], "text/plain": [ "recommendations finished on 19000/34354 queries. users per second: 5876.56" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 20000/34354 queries. users per second: 5943.82" ], "text/plain": [ "recommendations finished on 20000/34354 queries. users per second: 5943.82" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 21000/34354 queries. users per second: 5952.95" ], "text/plain": [ "recommendations finished on 21000/34354 queries. users per second: 5952.95" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 22000/34354 queries. users per second: 5934.66" ], "text/plain": [ "recommendations finished on 22000/34354 queries. users per second: 5934.66" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 23000/34354 queries. users per second: 5937.77" ], "text/plain": [ "recommendations finished on 23000/34354 queries. users per second: 5937.77" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 24000/34354 queries. users per second: 5984.55" ], "text/plain": [ "recommendations finished on 24000/34354 queries. users per second: 5984.55" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 25000/34354 queries. users per second: 5983.3" ], "text/plain": [ "recommendations finished on 25000/34354 queries. users per second: 5983.3" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 26000/34354 queries. users per second: 5979.39" ], "text/plain": [ "recommendations finished on 26000/34354 queries. users per second: 5979.39" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 27000/34354 queries. users per second: 5971.13" ], "text/plain": [ "recommendations finished on 27000/34354 queries. users per second: 5971.13" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 28000/34354 queries. users per second: 6012.81" ], "text/plain": [ "recommendations finished on 28000/34354 queries. users per second: 6012.81" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 29000/34354 queries. users per second: 6001.44" ], "text/plain": [ "recommendations finished on 29000/34354 queries. users per second: 6001.44" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 30000/34354 queries. users per second: 6024.65" ], "text/plain": [ "recommendations finished on 30000/34354 queries. users per second: 6024.65" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 31000/34354 queries. users per second: 5997.94" ], "text/plain": [ "recommendations finished on 31000/34354 queries. users per second: 5997.94" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 32000/34354 queries. users per second: 5964.91" ], "text/plain": [ "recommendations finished on 32000/34354 queries. users per second: 5964.91" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 33000/34354 queries. users per second: 5996.43" ], "text/plain": [ "recommendations finished on 33000/34354 queries. users per second: 5996.43" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
recommendations finished on 34000/34354 queries. users per second: 6000.34" ], "text/plain": [ "recommendations finished on 34000/34354 queries. users per second: 6000.34" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Precision and recall summary statistics by cutoff\n", "+--------+-----------------------+------------------------+\n", "| cutoff | mean_precision | mean_recall |\n", "+--------+-----------------------+------------------------+\n", "| 1 | 0.000611282528963149 | 0.00015967112604116908 |\n", "| 2 | 0.0005094021074692894 | 0.00020969792246105614 |\n", "| 3 | 0.0005142535561118539 | 0.0002897975387832757 |\n", "| 4 | 0.0005312336263608328 | 0.0004549968288297451 |\n", "| 5 | 0.0005414216685102181 | 0.0005703895459784241 |\n", "| 6 | 0.000548213696609806 | 0.0007142050704628652 |\n", "| 7 | 0.0005738570680062202 | 0.0008314573503625282 |\n", "| 8 | 0.0005930895965535321 | 0.0009602957110903129 |\n", "| 9 | 0.0006015796316780176 | 0.0011215295194248998 |\n", "| 10 | 0.0005967281830354553 | 0.0012125256843862468 |\n", "+--------+-----------------------+------------------------+\n", "[10 rows x 3 columns]\n", "\n", "\n", "Overall RMSE: 7.641661679566184\n", "\n", "Per User RMSE (best)\n", "+-------------------------------+-----------------------+-------+\n", "| user_id | rmse | count |\n", "+-------------------------------+-----------------------+-------+\n", "| 220f26b368277020c4e685351a... | 5.398923054400484e-06 | 1 |\n", "+-------------------------------+-----------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per User RMSE (worst)\n", "+-------------------------------+-------------------+-------+\n", "| user_id | rmse | count |\n", "+-------------------------------+-------------------+-------+\n", "| 38767872c514c1b43bab5c7b21... | 360.3407134339816 | 2 |\n", "+-------------------------------+-------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (best)\n", "+--------------------+----------------------+-------+\n", "| music_id | rmse | count |\n", "+--------------------+----------------------+-------+\n", "| SOAESGK12A8C138488 | 0.028188730543956098 | 1 |\n", "+--------------------+----------------------+-------+\n", "[1 rows x 3 columns]\n", "\n", "\n", "Per Item RMSE (worst)\n", "+--------------------+--------------------+-------+\n", "| music_id | rmse | count |\n", "+--------------------+--------------------+-------+\n", "| SOPKTFQ12A67021600 | 126.00562214029682 | 9 |\n", "+--------------------+--------------------+-------+\n", "[1 rows x 3 columns]\n", "\n" ] } ], "source": [ "result = tc.recommender.util.compare_models(test_set, \n", " [popularity_model, item_sim_model, factorization_machine_model],\n", " user_sample=.5, skip_set=train_set)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:29:22.874873Z", "start_time": "2019-06-14T16:29:22.451971Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "K = 10\n", "users = gl.SArray(sf['user_id'].unique().head(100))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2019-06-14T16:29:24.466111Z", "start_time": "2019-06-14T16:29:24.410860Z" }, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
user_id | \n", "music_id | \n", "score | \n", "rank | \n", "
---|---|---|---|
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOXUQNR12AF72A69D6 | \n", "3.022422651449839 | \n", "1 | \n", "
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOUFAZA12AC3DFAB20 | \n", "1.3368427753448486 | \n", "2 | \n", "
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOSFSTC12A8C141219 | \n", "1.091982126235962 | \n", "3 | \n", "
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOVIWFP12A58A7D1BD | \n", "1.045163869857788 | \n", "4 | \n", "
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOBMTQD12AB01833D0 | \n", "1.0294516881306965 | \n", "5 | \n", "
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOCMNRG12AB0189D3F | \n", "0.9756437937418619 | \n", "6 | \n", "
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOXOHUM12A67ADC826 | \n", "0.9506873289744059 | \n", "7 | \n", "
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOWBFVW12A6D4F612B | \n", "0.9092370669047037 | \n", "8 | \n", "
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOXFYTY127E9433E7D | \n", "0.8977278073628744 | \n", "9 | \n", "
279292bb36dbfc7f505e36ebf 038c81eb1d1d63e ... | \n",
" SOYBLYP12A58A79D32 | \n", "0.8970928192138672 | \n", "10 | \n", "