{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%config Completer.use_jedi = False" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "from optuna.exceptions import ExperimentalWarning\n", "warnings.filterwarnings(\"ignore\", category=UserWarning)\n", "warnings.filterwarnings(\"ignore\", category=ExperimentalWarning)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "from pyspark.sql.types import IntegerType\n", "from pyspark.sql.functions import array_contains, col, explode, split, substring\n", "\n", "from replay.data_preparator import DataPreparator\n", "from replay.experiment import Experiment\n", "from replay.metrics import HitRate, NDCG, MAP, Coverage\n", "from replay.models import LightFMWrap\n", "from replay.session_handler import State\n", "from replay.splitters import UserSplitter\n", "from rs_datasets import MovieLens" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "K=10\n", "SEED=1234" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# The notebook contains an example of LightFM model usage and dataset preprocessing with RePlay, including:\n", "1. Data loading\n", "2. Features preprocessing with pyspark\n", "3. Building LightFM model based on interaction matrix and features\n", "4. Model evaluation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 1) Data loading" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will use MovieLens 10m dataset from rs_datasets package, which contains a list of recommendations datasets." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "ratings\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
" ], "text/plain": [ " user_id item_id rating timestamp\n", "0 1 122 5.0 838985046\n", "1 1 185 5.0 838983525\n", "2 1 231 5.0 838983392" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "items\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01Toy Story (1995)Adventure|Animation|Children|Comedy|Fantasy
12Jumanji (1995)Adventure|Children|Fantasy
23Grumpier Old Men (1995)Comedy|Romance
\n", "
" ], "text/plain": [ " item_id title \\\n", "0 1 Toy Story (1995) \n", "1 2 Jumanji (1995) \n", "2 3 Grumpier Old Men (1995) \n", "\n", " genres \n", "0 Adventure|Animation|Children|Comedy|Fantasy \n", "1 Adventure|Children|Fantasy \n", "2 Comedy|Romance " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "tags\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
" ], "text/plain": [ " user_id item_id tag timestamp\n", "0 15 4973 excellent! 1215184630\n", "1 20 1747 politics 1188263867\n", "2 20 1747 satire 1188263867" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "data = MovieLens(\"10m\")\n", "data.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Convert interaction log to RePlay format" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING: An illegal reflective access operation has occurred\n", "WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/home/u19893556/miniconda3/envs/replay/lib/python3.7/site-packages/pyspark/jars/spark-unsafe_2.12-3.1.2.jar) to constructor java.nio.DirectByteBuffer(long,int)\n", "WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform\n", "WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations\n", "WARNING: All illegal access operations will be denied in a future release\n", "22/02/27 22:14:17 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n", "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n", "Setting default log level to \"WARN\".\n", "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", "22/02/27 22:14:17 WARN SparkConf: Note that spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS in YARN).\n", "22/02/27 22:14:18 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.\n", "22/02/27 22:14:18 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.\n", "22/02/27 22:14:23 WARN TaskSetManager: Stage 0 contains a task of very large size (4073 KiB). The maximum recommended task size is 1000 KiB.\n", "22/02/27 22:14:27 WARN TaskSetManager: Stage 2 contains a task of very large size (4073 KiB). The maximum recommended task size is 1000 KiB.\n", "22/02/27 22:14:30 WARN TaskSetManager: Stage 4 contains a task of very large size (4073 KiB). The maximum recommended task size is 1000 KiB.\n", "22/02/27 22:14:38 WARN TaskSetManager: Stage 6 contains a task of very large size (4073 KiB). The maximum recommended task size is 1000 KiB.\n", " \r" ] } ], "source": [ "preparator = DataPreparator()\n", "log, _, item_features = preparator(data.ratings, item_features=data.items, mapping={\"relevance\": \"rating\"})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Data split" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "user_random_splitter = UserSplitter(\n", " item_test_size=K,\n", " user_test_size=500,\n", " drop_cold_items=True,\n", " drop_cold_users=True,\n", " shuffle=True,\n", " seed=SEED\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "22/02/27 22:14:47 WARN DAGScheduler: Broadcasting large task binary with size 2004.4 KiB\n", "22/02/27 22:14:47 WARN TaskSetManager: Stage 10 contains a task of very large size (4073 KiB). The maximum recommended task size is 1000 KiB.\n", "22/02/27 22:14:49 WARN DAGScheduler: Broadcasting large task binary with size 2011.1 KiB\n", "22/02/27 22:14:50 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.\n", "22/02/27 22:14:50 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.\n", "22/02/27 22:14:51 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB\n", "22/02/27 22:14:51 WARN TaskSetManager: Stage 15 contains a task of very large size (4073 KiB). The maximum recommended task size is 1000 KiB.\n", "22/02/27 22:14:51 WARN DAGScheduler: Broadcasting large task binary with size 2004.7 KiB\n", "22/02/27 22:14:52 WARN TaskSetManager: Stage 13 contains a task of very large size (4073 KiB). The maximum recommended task size is 1000 KiB.\n", "22/02/27 22:14:55 WARN DAGScheduler: Broadcasting large task binary with size 2008.8 KiB\n", "22/02/27 22:14:56 WARN DAGScheduler: Broadcasting large task binary with size 2013.9 KiB\n", "22/02/27 22:14:56 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:01 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:01 WARN DAGScheduler: Broadcasting large task binary with size 2004.8 KiB\n", "22/02/27 22:15:01 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB\n", "22/02/27 22:15:02 WARN TaskSetManager: Stage 19 contains a task of very large size (4073 KiB). The maximum recommended task size is 1000 KiB.\n", "22/02/27 22:15:03 WARN TaskSetManager: Stage 22 contains a task of very large size (4073 KiB). The maximum recommended task size is 1000 KiB.\n", "22/02/27 22:15:05 WARN DAGScheduler: Broadcasting large task binary with size 2009.1 KiB\n", "22/02/27 22:15:08 WARN DAGScheduler: Broadcasting large task binary with size 2014.1 KiB\n", "22/02/27 22:15:10 WARN DAGScheduler: Broadcasting large task binary with size 2.2 MiB\n", "22/02/27 22:15:15 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:17 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", " \r" ] }, { "data": { "text/plain": [ "(9995054, 5000)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train, test = user_random_splitter.split(log)\n", "train.count(), test.count()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "22/02/27 22:15:18 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:19 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.\n", "22/02/27 22:15:19 WARN WindowExec: No Partition Defined for Window operation! Moving all data to a single partition, this can cause serious performance degradation.\n", "22/02/27 22:15:20 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:21 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:21 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:26 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:26 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:27 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:34 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:39 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:41 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", " \r" ] }, { "data": { "text/plain": [ "(9990054, 5000)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_opt, val_opt = user_random_splitter.split(train)\n", "train_opt.count(), val_opt.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 2) Features preprocessing with pyspark" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Year" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------+----+\n", "|item_idx|year|\n", "+--------+----+\n", "| 11|1995|\n", "| 117|1995|\n", "+--------+----+\n", "only showing top 2 rows\n", "\n" ] } ], "source": [ "year = item_features.withColumn('year', substring(col('title'), -5, 4).astype(IntegerType())).select('item_idx', 'year')\n", "year.show(2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Genres" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "genres = (\n", " State().session.createDataFrame(data.items[[\"item_id\", \"genres\"]].rename({'item_id': 'item_idx'}, axis=1))\n", " .select(\n", " \"item_idx\",\n", " split(\"genres\", \"\\|\").alias(\"genres\")\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "+--------+--------------------+\n", "|item_idx| genres|\n", "+--------+--------------------+\n", "| 1|[Adventure, Anima...|\n", "| 2|[Adventure, Child...|\n", "| 3| [Comedy, Romance]|\n", "| 4|[Comedy, Drama, R...|\n", "| 5| [Comedy]|\n", "| 6|[Action, Crime, T...|\n", "| 7| [Comedy, Romance]|\n", "| 8|[Adventure, Child...|\n", "| 9| [Action]|\n", "| 10|[Action, Adventur...|\n", "| 11|[Comedy, Drama, R...|\n", "| 12| [Comedy, Horror]|\n", "| 13|[Animation, Child...|\n", "| 14| [Drama]|\n", "| 15|[Action, Adventur...|\n", "| 16| [Crime, Drama]|\n", "| 17|[Comedy, Drama, R...|\n", "| 18|[Comedy, Drama, T...|\n", "| 19| [Comedy]|\n", "| 20|[Action, Comedy, ...|\n", "+--------+--------------------+\n", "only showing top 20 rows\n", "\n" ] } ], "source": [ "genres.show()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "genres_list = (\n", " genres.select(explode(\"genres\").alias(\"genre\"))\n", " .distinct().filter('genre <> \"(no genres listed)\"')\n", " .toPandas()[\"genre\"].tolist()\n", ")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Documentary',\n", " 'IMAX',\n", " 'Adventure',\n", " 'Animation',\n", " 'Comedy',\n", " 'Thriller',\n", " 'Sci-Fi',\n", " 'Musical',\n", " 'Horror',\n", " 'Action',\n", " 'Fantasy',\n", " 'War',\n", " 'Mystery',\n", " 'Drama',\n", " 'Film-Noir',\n", " 'Crime',\n", " 'Western',\n", " 'Romance',\n", " 'Children']" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "genres_list" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10681" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item_features = genres\n", "for genre in genres_list:\n", " item_features = item_features.withColumn(\n", " genre,\n", " array_contains(col(\"genres\"), genre).astype(IntegerType())\n", " )\n", "item_features = item_features.drop(\"genres\").cache()\n", "item_features.count()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8316" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item_features = item_features.join(year, on='item_idx', how='inner')\n", "item_features.cache()\n", "item_features.count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 3) Building LightFM model based on interaction matrix and features" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "model_feat = LightFMWrap(random_state=SEED, loss='warp', no_components=128)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "22/02/27 22:15:44 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:45 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:49 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:49 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:50 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:50 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:52 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:54 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:55 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:57 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:15:58 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:16:00 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:16:00 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:16:02 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:16:03 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:16:10 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", " \r" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 13h 4min 1s, sys: 53.2 s, total: 13h 4min 54s\n", "Wall time: 17min 29s\n" ] } ], "source": [ "%%time\n", "model_feat.fit(train, item_features=item_features)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "27-Feb-22 22:33:13, replay, WARNING: This model can't predict cold users, they will be ignored\n", "27-Feb-22 22:33:13, replay, WARNING: This model can't predict cold users, they will be ignored\n", "22/02/27 22:33:14 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:14 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:14 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:15 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:16 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:17 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:17 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:17 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:19 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:20 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:20 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:20 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:25 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:26 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:26 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:27 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:28 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:29 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:29 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:30 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:33 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:35 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:35 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:35 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:36 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 205 ms, sys: 247 ms, total: 452 ms\n", "Wall time: 23.1 s\n" ] } ], "source": [ "%%time\n", "recs = model_feat.predict(\n", " k=K,\n", " users=test.select('user_idx').distinct(),\n", " log=train,\n", " filter_seen_items=True,\n", " item_features=item_features\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 4) Model evaluation" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "ExecuteTime": { "end_time": "2020-02-10T16:07:28.942205Z", "start_time": "2020-02-10T16:07:26.281475Z" }, "jupyter": { "outputs_hidden": false } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "22/02/27 22:33:37 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:39 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", " \r" ] } ], "source": [ "metrics = Experiment(test, {NDCG(): K,\n", " MAP() : K,\n", " HitRate(): [1, K],\n", " Coverage(train): K})\n", " " ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "22/02/27 22:33:40 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:40 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:40 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:40 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:42 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:42 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:42 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:43 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:47 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:55 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:57 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:57 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:58 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:33:59 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:59 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:59 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:33:59 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:34:01 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:01 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:01 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:03 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:06 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:07 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:08 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:08 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:34:08 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:34:08 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:34:09 WARN DAGScheduler: Broadcasting large task binary with size 2.3 MiB\n", "22/02/27 22:34:10 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:10 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:11 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:13 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:15 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:16 WARN DAGScheduler: Broadcasting large task binary with size 2.4 MiB\n", "22/02/27 22:34:19 WARN DAGScheduler: Broadcasting large task binary with size 2.5 MiB\n", "22/02/27 22:34:20 WARN DAGScheduler: Broadcasting large task binary with size 2.5 MiB\n", "22/02/27 22:34:21 WARN DAGScheduler: Broadcasting large task binary with size 2.5 MiB\n", "22/02/27 22:34:21 WARN DAGScheduler: Broadcasting large task binary with size 2.5 MiB\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
" ], "text/plain": [ " Coverage@10 HitRate@1 HitRate@10 MAP@10 NDCG@10\n", "LightFM_item_features 0.066404 0.348 0.784 0.121321 0.229198" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics.add_result(\"LightFM_item_features\", recs)\n", "metrics.results" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.11" } }, "nbformat": 4, "nbformat_minor": 4 }