{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "recsys-evaluation-metrics-part-2.ipynb", "provenance": [], "collapsed_sections": [], "authorship_tag": "ABX9TyNAlixXGyEvZKsDppkgiLBX" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "bt279XzUARiB" }, "source": [ "# Recommender System Evaluations - Part 2\n", "> Understanding evaluation metrics and pricing factors\n", "\n", "- toc: true\n", "- badges: true\n", "- comments: true\n", "- categories: [Evaluation]\n", "- image:" ] }, { "cell_type": "code", "metadata": { "id": "_pWvCiiDdjr_" }, "source": [ "import numpy as np \n", "import pandas as pd \n", "import math" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "XXtU4GFqdn3G" }, "source": [ "## HR@K" ] }, { "cell_type": "code", "metadata": { "id": "MwcAl3w3dm-w" }, "source": [ "def hit_rate_at_k(recommended_list, bought_list, k=5): \n", " bought_list = np.array(bought_list) \n", " recommended_list = np.array(recommended_list)[:k]\n", " flags = np.isin(bought_list, recommended_list) \n", " return (flags.sum() > 0) * 1" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "xEck0b3EeWCz" }, "source": [ "recommended_list = [156, 1134, 27, 1543, 3345, 143, 32, 533, 11, 43] #items ids\n", "bought_list = [521, 32, 143, 991]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Mzw-ZKndeqrW", "outputId": "94a28725-61fb-4167-ebe2-54fd33919d4e" }, "source": [ "hit_rate_at_k(recommended_list, bought_list, 5)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0" ] }, "metadata": { "tags": [] }, "execution_count": 6 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "a5M85f-ffGhO", "outputId": "79b59344-0f3b-4000-960f-8480a029fe8d" }, "source": [ "hit_rate_at_k(recommended_list, bought_list, 10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "1" ] }, "metadata": { "tags": [] }, "execution_count": 9 } ] }, { "cell_type": "markdown", "metadata": { "id": "h_d4yXPCgS67" }, "source": [ "## Precision@K" ] }, { "cell_type": "markdown", "metadata": { "id": "A3QxPFvUgcHC" }, "source": [ "- Precision = (# of recommended items that are relevant) / (# of recommended items)\n", "- Precision @ k = (# of recommended items @k that are relevant) / (# of recommended items @k)\n", "- Money Precision @ k = (revenue of recommended items @k that are relevant) / (revenue of recommended items @k)" ] }, { "cell_type": "code", "metadata": { "id": "gtweDIeDgZWw" }, "source": [ "def precision_at_k(recommended_list, bought_list, k=5):\n", " bought_list = np.array(bought_list)\n", " recommended_list = np.array(recommended_list)[:k]\n", " \n", " flags = np.isin(bought_list, recommended_list)\n", " return flags.sum() / len(recommended_list)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "2KWbIvmsglul" }, "source": [ "def money_precision_at_k(recommended_list, bought_list, prices_recommended, k=5):\n", " recommend_list = np.array(recommended_list)[:k] \n", " prices_recommended = np.array(prices_recommended)[:k]\n", " flags = np.isin(recommend_list, bought_list)\n", " precision = np.dot(flags, prices_recommended) / prices_recommended.sum()\n", " return precision" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "w1IHDZ8dkZqk" }, "source": [ "recommended_list = [156, 1134, 27, 1543, 3345, 143, 32, 533, 11, 43] #items ids\n", "bought_list = [521, 32, 143, 991]\n", "prices_recommendede_list = [400, 60, 40, 90, 60, 340, 70, 190,110, 240]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ISqhoHgwksM6", "outputId": "2a6b2232-a4d9-4436-f038-b026c7bdd9d0" }, "source": [ "precision_at_k(recommended_list, bought_list, 5)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.0" ] }, "metadata": { "tags": [] }, "execution_count": 14 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6ukYqJRHkxGO", "outputId": "14f26a85-6e9e-4269-f3ee-5f154ffdb34c" }, "source": [ "precision_at_k(recommended_list, bought_list, 10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.2" ] }, "metadata": { "tags": [] }, "execution_count": 15 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "gPi_uXyElLqD", "outputId": "5cbeaee4-522e-4c34-e6f4-17b775348678" }, "source": [ "money_precision_at_k(recommended_list, bought_list, prices_recommendede_list, 5)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.0" ] }, "metadata": { "tags": [] }, "execution_count": 16 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ee_aWCJhlLmz", "outputId": "b2e73f90-577e-48ae-8d19-b22dfcd39bd3" }, "source": [ "money_precision_at_k(recommended_list, bought_list, prices_recommendede_list, 10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.25625" ] }, "metadata": { "tags": [] }, "execution_count": 17 } ] }, { "cell_type": "markdown", "metadata": { "id": "34bVpwgslYcp" }, "source": [ "## Recall@K" ] }, { "cell_type": "markdown", "metadata": { "id": "7MStCbMXlbiI" }, "source": [ "- Recall = (# of recommended items that are relevant) / (# of relevant items)\n", "- Recall @ k = (# of recommended items @k that are relevant) / (# of relevant items)\n", "- Money Recall @ k = (revenue of recommended items @k that are relevant) / (revenue of relevant items)" ] }, { "cell_type": "code", "metadata": { "id": "a8KPrGjllaWA" }, "source": [ "recommended_list=[143,156,1134,991,27,1543,3345,533,11,43] #itemsid\n", "prices_recommended_list=[400,60,40,90,60,340,70,190,110,240]\n", "\n", "bought_list=[521,32,143,991]\n", "prices_bought=[150,30,400,90]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Ni6hjugKlkkM" }, "source": [ "def recall_at_k(recommended_list, bought_list, k=5):\n", " bought_list = np.array(bought_list)\n", " recommended_list = np.array(recommended_list)[:k]\n", " \n", " flags = np.isin(bought_list, recommended_list)\n", " return flags.sum() / len(bought_list)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ZlrS7hZPloF9" }, "source": [ "def money_recall_at_k(recommended_list, bought_list, prices_recommended, prices_bought, k=5):\n", " bought_list = np.array(bought_list)\n", " prices_bought = np.array(prices_bought)\n", " recommended_list = np.array(recommended_list)[:k]\n", " prices_recommended = np.array(prices_recommended)[:k]\n", "\n", " flags = np.isin(recommended_list, bought_list)\n", " return np.dot(flags, prices_recommended)/prices_bought.sum()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "f5fepPoqmP2l", "outputId": "65d4ae56-af27-4e86-9bc6-935ddccec26a" }, "source": [ "recall_at_k(recommended_list, bought_list, 5)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.5" ] }, "metadata": { "tags": [] }, "execution_count": 22 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "eiVBQBvfmP2m", "outputId": "72056673-b92f-44c3-e8ee-d275cec7d247" }, "source": [ "recall_at_k(recommended_list, bought_list, 10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.5" ] }, "metadata": { "tags": [] }, "execution_count": 23 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "8eYgatL-mP2n", "outputId": "f02d8b2c-aa74-4c91-f7f7-67a8797ce183" }, "source": [ "money_recall_at_k(recommended_list, bought_list, prices_recommendede_list, 5)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "98.0" ] }, "metadata": { "tags": [] }, "execution_count": 24 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "y-x5T3HOmP2o", "outputId": "d294c3be-32ae-408e-c997-ae4ff6f22dfc" }, "source": [ "money_recall_at_k(recommended_list, bought_list, prices_recommendede_list, 10)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "49.0" ] }, "metadata": { "tags": [] }, "execution_count": 25 } ] }, { "cell_type": "markdown", "metadata": { "id": "FsR6hHRembdu" }, "source": [ "## MAP@K\n", "- MAP @ k (Mean Average Precision @ k )\n", "- Average AP @ k for all users" ] }, { "cell_type": "code", "metadata": { "id": "g1C1deyvmhba" }, "source": [ "def ap_k(recommended_list, bought_list, k=5):\n", " \n", " bought_list = np.array(bought_list)\n", " recommended_list = np.array(recommended_list)[:k]\n", " \n", " relevant_indexes = np.nonzero(np.isin(recommended_list, bought_list))[0]\n", " if len(relevant_indexes) == 0:\n", " return 0\n", " \n", " amount_relevant = len(relevant_indexes)\n", " \n", " sum_ = sum([precision_at_k(recommended_list, bought_list, k=index_relevant+1) for index_relevant in relevant_indexes])\n", " return sum_/amount_relevant" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "x6m00Jc3mjWF" }, "source": [ "def map_k(recommended_list, bought_list, k=5):\n", "\n", " amount_user = len(bought_list)\n", " list_ap_k = [ap_k(recommended_list[i], bought_list[i], k) for i in np.arange(amount_user)]\n", " \n", " sum_ap_k = sum(list_ap_k) \n", " return sum_ap_k/amount_user" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "x8GehNJDmpKA" }, "source": [ "#list of 3 users\n", "recommended_list_3_users = [[143,156,1134,991,27,1543,3345,533,11,43],\n", " [1134,533,14,4,15,1543,1,99,27,3345],\n", " [991,3345,27,533,43,143,1543,156,1134,11]]\n", "\n", "bought_list_3_users= [[521,32,143], #user1\n", " [143,156,991,43,11], #user2\n", " [1,2]] #user3" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "UnHTv6kvm0LX", "outputId": "0dd9f665-5a8e-4bbb-9580-8b04619df0fd" }, "source": [ "map_k(recommended_list_3_users, bought_list_3_users, 5)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.3333333333333333" ] }, "metadata": { "tags": [] }, "execution_count": 29 } ] }, { "cell_type": "markdown", "metadata": { "id": "OFt1EYJjm-6T" }, "source": [ "## MRR@K" ] }, { "cell_type": "code", "metadata": { "id": "JoOHG0rvnAUJ" }, "source": [ "def reciprocal_rank(recommended_list, bought_list, k=1):\n", " recommended_list = np.array(recommended_list)\n", " bought_list = np.array(bought_list)\n", " \n", " amount_user = len(bought_list)\n", " rr = []\n", " for i in np.arange(amount_user): \n", " relevant_indexes = np.nonzero(np.isin(recommended_list[i][:k], bought_list[i]))[0]\n", " if len(relevant_indexes) != 0:\n", " rr.append(1/(relevant_indexes[0]+1))\n", " \n", " if len(rr) == 0:\n", " return 0\n", " \n", " return sum(rr)/amount_user" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hVhEiJ3NnGX_", "outputId": "e6901d23-e342-47e5-96d1-db0c5b57a3af" }, "source": [ "reciprocal_rank(recommended_list_3_users, bought_list_3_users, 5)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:3: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray\n", " This is separate from the ipykernel package so we can avoid doing imports until\n" ], "name": "stderr" }, { "output_type": "execute_result", "data": { "text/plain": [ "0.3333333333333333" ] }, "metadata": { "tags": [] }, "execution_count": 33 } ] }, { "cell_type": "markdown", "metadata": { "id": "tlb3LmFunVEK" }, "source": [ "## NDCG@K" ] }, { "cell_type": "code", "metadata": { "id": "5blGg05wfSB8" }, "source": [ "def ndcg_at_k(recommended_list, bought_list, k=5):\n", " rec = recommended_list\n", " b = bought_list\n", " \n", " recommended_list = np.array(recommended_list)[:k]\n", " bought_list = np.array(bought_list)\n", " \n", " flags = np.isin(recommended_list, bought_list)\n", " rank_list = []\n", " for i in np.arange(len(recommended_list)):\n", " if i < 2:\n", " rank_list.append(i+1)\n", " else:\n", " rank_list.append(math.log2(i+1))\n", " if len(recommended_list) == 0:\n", " return 0\n", " dcg = sum(np.divide(flags, rank_list)) / len(recommended_list)\n", "\n", " i_dcg = sum(np.divide(1, rank_list)) / len(recommended_list)\n", "# print(i_dcg)\n", " return dcg/i_dcg" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "8XO4G7VvnXtd" }, "source": [ "recommended_list = [143,156,1134,991,27,1543,3345,533,11,43] #iditems\n", "prices_recommended_list = [400,60,40,90,60,340,70,190,110,240]\n", "\n", "bought_list = [521,32,143,991]\n", "prices_bought = [150,30,400,90]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6fqcvbvMnfUC", "outputId": "a135ea43-30d6-48a4-af03-f4d68565f2dc" }, "source": [ "ndcg_at_k(recommended_list, bought_list, 5)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.489938890671454" ] }, "metadata": { "tags": [] }, "execution_count": 37 } ] } ] }