{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "mzsjxPxPuRYX" }, "source": [ "# Contextual bandit with changing context\n", "> Customizing the context and changing it midway to see how fast the agent can adapt to the new context and start recommending better products as per the context\n", "\n", "- toc: true\n", "- badges: true\n", "- comments: true\n", "- categories: [contextual bandit]\n", "- image: " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "7SCDe05G1QSx", "outputId": "aa23d3f1-5df9-43df-e742-4710a370c32f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[K |████████████████████████████████| 2.6MB 7.0MB/s \n", "\u001b[?25h" ] } ], "source": [ "!pip install -q vowpalwabbit" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "U17IKmm51Sa_" }, "outputs": [], "source": [ "from vowpalwabbit import pyvw\n", "import random\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "from itertools import product" ] }, { "cell_type": "markdown", "metadata": { "id": "Mh7X3wHlrvk7" }, "source": [ "### Setting the context" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "AFdEwthO1U6N" }, "outputs": [], "source": [ "USER_LIKED_ARTICLE = -1.0\n", "USER_DISLIKED_ARTICLE = 0.0" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Joz5em0y7kfi" }, "outputs": [], "source": [ "users = ['A','B','C']\n", "items = ['Item1','Item2','Item3','Item4','Item5','Item6']\n", "context1 = ['morning','evening']\n", "context2 = ['summer','winter']\n", "\n", "context = pd.DataFrame(list(product(users, context1, context2, items)), columns=['users', 'context1', 'context2', 'items'])\n", "context['reward'] = 0\n", "\n", "#user 1 likes Item 1 in morning, and Item 6 in summer\n", "context.loc[(context.users=='A') & \\\n", " (context.context1=='morning') & \\\n", " (context['items']=='Item1'), \\\n", " 'reward'] = 1\n", "context.loc[(context.users=='A') & \\\n", " (context.context2=='summer') & \\\n", " (context['items']=='Item6'), \\\n", " 'reward'] = 1\n", "\n", "#user 2 likes Item 2 in winter, and Item 5 in summer morning\n", "context.loc[(context.users=='B') & \\\n", " (context.context2=='winter') & \\\n", " (context['items']=='Item2'), \\\n", " 'reward'] = 1\n", "context.loc[(context.users=='B') & \\\n", " (context.context1=='morning') & \\\n", " (context.context2=='summer') & \\\n", " (context['items']=='Item5'), \\\n", " 'reward'] = 1\n", "\n", "\n", "#user 3 likes Item 2 in morning, Item 3 in evening, and item 4 in winter morning\n", "context.loc[(context.users=='C') & \\\n", " (context.context1=='morning') & \\\n", " (context['items']=='Item2'), \\\n", " 'reward'] = 1\n", "context.loc[(context.users=='C') & \\\n", " (context.context1=='evening') & \\\n", " (context['items']=='Item3'), \\\n", " 'reward'] = 1\n", "context.loc[(context.users=='C') & \\\n", " (context.context1=='morning') & \\\n", " (context.context2=='winter') & \\\n", " (context['items']=='Item4'), \\\n", " 'reward'] = 1\n", "\n", "context['cost'] = context['reward']*-1\n", "\n", "contextdf = context.copy()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "rpgSzAFjv_Rh", "outputId": "7398f2a8-d5c0-4281-e98d-7c08b6363373" }, "outputs": [ { "data": { "text/plain": [ " 0 60\n", "-1 12\n", "Name: cost, dtype: int64" ] }, "execution_count": 5, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "contextdf.cost.value_counts()" ] }, { "cell_type": "markdown", "metadata": { "id": "_nTz89_drrWM" }, "source": [ "### Cost function util" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "QlOd83hd1Z3F" }, "outputs": [], "source": [ "def get_cost(context,action):\n", " return contextdf.loc[(contextdf['users']==context['user']) & \\\n", " (contextdf.context1==context['context1']) & \\\n", " (contextdf.context2==context['context2']) & \\\n", " (contextdf['items']==action), \\\n", " 'cost'].values[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "6P_HYhhW8UWO", "outputId": "06eb7e38-ede8-4013-8767-8e230092bb1a" }, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 7, "metadata": { "tags": [] }, "output_type": "execute_result" } ], "source": [ "get_cost({'user':'A','context1':'morning','context2':'summer'},'Item2')" ] }, { "cell_type": "markdown", "metadata": { "id": "wws6kC3ur2R0" }, "source": [ "### Vowpalwabbit format util" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "pTCHTNFf3jRe" }, "outputs": [], "source": [ "# This function modifies (context, action, cost, probability) to VW friendly format\n", "def to_vw_example_format(context, actions, cb_label = None):\n", " if cb_label is not None:\n", " chosen_action, cost, prob = cb_label\n", " example_string = \"\"\n", " example_string += \"shared |User users={} context1={} context2={}\\n\".format(context[\"user\"], context[\"context1\"], context[\"context2\"])\n", " for action in actions:\n", " if cb_label is not None and action == chosen_action:\n", " example_string += \"0:{}:{} \".format(cost, prob)\n", " example_string += \"|Action items={} \\n\".format(action)\n", " #Strip the last newline\n", " return example_string[:-1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "k8dLifwF3eDJ", "outputId": "29547726-b54f-42bf-cfbd-190195a43243" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "shared |User users=A context1=morning context2=summer\n", "|Action items=Item1 \n", "|Action items=Item2 \n", "|Action items=Item3 \n", "|Action items=Item4 \n", "|Action items=Item5 \n", "|Action items=Item6 \n" ] } ], "source": [ "context = {\"user\":\"A\",\"context1\":\"morning\",\"context2\":\"summer\"}\n", "\n", "print(to_vw_example_format(context,items))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "dM_aaYyb5xe9" }, "outputs": [], "source": [ "def sample_custom_pmf(pmf):\n", " total = sum(pmf)\n", " scale = 1 / total\n", " pmf = [x * scale for x in pmf]\n", " draw = random.random()\n", " sum_prob = 0.0\n", " for index, prob in enumerate(pmf):\n", " sum_prob += prob\n", " if(sum_prob > draw):\n", " return index, prob" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1xeAG23o2EGj" }, "outputs": [], "source": [ "def get_action(vw, context, actions):\n", " vw_text_example = to_vw_example_format(context,actions)\n", " pmf = vw.predict(vw_text_example)\n", " chosen_action_index, prob = sample_custom_pmf(pmf)\n", " return actions[chosen_action_index], prob" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "az_BHiJ32EwX" }, "outputs": [], "source": [ "def choose_user(users):\n", " return random.choice(users)\n", "\n", "def choose_context1(context1):\n", " return random.choice(context1)\n", "\n", "def choose_context2(context2):\n", " return random.choice(context2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "XqWD3pqt2GTr" }, "outputs": [], "source": [ "def run_simulation(vw, num_iterations, users, contexts1, contexts2, actions, cost_function, do_learn = True):\n", " cost_sum = 0.\n", " ctr = []\n", "\n", " for i in range(1, num_iterations+1):\n", " user = choose_user(users)\n", " context1 = choose_context1(contexts1)\n", " context2 = choose_context2(contexts2)\n", "\n", " context = {'user': user, 'context1': context1, 'context2': context2}\n", " # print(context)\n", " action, prob = get_action(vw, context, actions)\n", " # print(action, prob)\n", "\n", " cost = cost_function(context, action)\n", " # print(cost)\n", " cost_sum += cost\n", "\n", " if do_learn:\n", " # 5. Inform VW of what happened so we can learn from it\n", " vw_format = vw.parse(to_vw_example_format(context, actions, (action, cost, prob)),pyvw.vw.lContextualBandit)\n", " # 6. Learn\n", " vw.learn(vw_format)\n", " # 7. Let VW know you're done with these objects\n", " vw.finish_example(vw_format)\n", "\n", " # We negate this so that on the plot instead of minimizing cost, we are maximizing reward\n", " ctr.append(-1*cost_sum/i)\n", "\n", " return ctr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "fPDQ5H512KEj" }, "outputs": [], "source": [ "def plot_ctr(num_iterations, ctr):\n", " plt.plot(range(1,num_iterations+1), ctr)\n", " plt.xlabel('num_iterations', fontsize=14)\n", " plt.ylabel('ctr', fontsize=14)\n", " plt.ylim([0,1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 287 }, "id": "wPhRpLyo2MmF", "outputId": "51f7cefd-986b-4366-c767-f9b6938a432c" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light", "tags": [] }, "output_type": "display_data" } ], "source": [ "# Instantiate learner in VW\n", "vw = pyvw.vw(\"--cb_explore_adf -q UA --quiet --epsilon 0.2\")\n", "\n", "num_iterations = 5000\n", "ctr = run_simulation(vw, num_iterations, users, context1, context2, items, get_cost)\n", "\n", "plot_ctr(num_iterations, ctr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 287 }, "id": "TL81VOqK2Tbq", "outputId": "acae0673-e691-419d-e766-604b1a9807ef" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light", "tags": [] }, "output_type": "display_data" } ], "source": [ "# Instantiate learner in VW but without -q\n", "vw = pyvw.vw(\"--cb_explore_adf --quiet --epsilon 0.2\")\n", "\n", "num_iterations = 5000\n", "ctr = run_simulation(vw, num_iterations, users, context1, context2, items, get_cost)\n", "\n", "plot_ctr(num_iterations, ctr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 287 }, "id": "2vU5Kozv2WDg", "outputId": "a77bdbf6-e776-4dc5-bc9d-3daaa335156f" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light", "tags": [] }, "output_type": "display_data" } ], "source": [ "# Instantiate learner in VW\n", "vw = pyvw.vw(\"--cb_explore_adf -q UA --quiet --epsilon 0.2\")\n", "\n", "num_iterations = 5000\n", "ctr = run_simulation(vw, num_iterations, users, context1, context2, items, get_cost, do_learn=False)\n", "\n", "plot_ctr(num_iterations, ctr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "UzavSLvC2ZHm" }, "outputs": [], "source": [ "users = ['A','B','C']\n", "items = ['Item1','Item2','Item3','Item4','Item5','Item6']\n", "context1 = ['morning','evening']\n", "context2 = ['summer','winter']\n", "\n", "context = pd.DataFrame(list(product(users, context1, context2, items)), columns=['users', 'context1', 'context2', 'items'])\n", "context['reward'] = 0\n", "\n", "#user 1 likes Item 2 in morning, and Item 5 in summer\n", "context.loc[(context.users=='A') & \\\n", " (context.context1=='morning') & \\\n", " (context['items']=='Item2'), \\\n", " 'reward'] = 1\n", "context.loc[(context.users=='A') & \\\n", " (context.context2=='summer') & \\\n", " (context['items']=='Item5'), \\\n", " 'reward'] = 1\n", "\n", "#user 2 likes Item 2 in summer, and Item 5 in morning\n", "context.loc[(context.users=='B') & \\\n", " (context.context2=='summer') & \\\n", " (context['items']=='Item2'), \\\n", " 'reward'] = 1\n", "context.loc[(context.users=='B') & \\\n", " (context.context1=='morning') & \\\n", " (context['items']=='Item5'), \\\n", " 'reward'] = 1\n", "\n", "\n", "#user 3 likes Item 4 in morning, Item 3 in evening, and item 4 in winter evening\n", "context.loc[(context.users=='C') & \\\n", " (context.context1=='morning') & \\\n", " (context['items']=='Item4'), \\\n", " 'reward'] = 1\n", "context.loc[(context.users=='C') & \\\n", " (context.context1=='evening') & \\\n", " (context['items']=='Item3'), \\\n", " 'reward'] = 1\n", "context.loc[(context.users=='C') & \\\n", " (context.context1=='evening') & \\\n", " (context.context2=='winter') & \\\n", " (context['items']=='Item4'), \\\n", " 'reward'] = 1\n", "\n", "context['cost'] = context['reward']*-1\n", "\n", "contextdf_new = context.copy()\n", "\n", "def get_cost_new1(context,action):\n", " return contextdf_new.loc[(contextdf_new['users']==context['user']) & \\\n", " (contextdf_new.context1==context['context1']) & \\\n", " (contextdf_new.context2==context['context2']) & \\\n", " (contextdf_new['items']==action), \\\n", " 'cost'].values[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Mqvn5gLc2ik2" }, "outputs": [], "source": [ "def run_simulation_multiple_cost_functions(vw, num_iterations, users, contexts1, contexts2, actions, cost_functions, do_learn = True):\n", " cost_sum = 0.\n", " ctr = []\n", "\n", " start_counter = 1\n", " end_counter = start_counter + num_iterations\n", " for cost_function in cost_functions:\n", " for i in range(start_counter, end_counter):\n", " user = choose_user(users)\n", " context1 = choose_context1(contexts1)\n", " context2 = choose_context2(contexts2)\n", "\n", " context = {'user': user, 'context1': context1, 'context2': context2}\n", " \n", " action, prob = get_action(vw, context, actions)\n", " cost = cost_function(context, action)\n", " cost_sum += cost\n", "\n", " if do_learn:\n", " vw_format = vw.parse(to_vw_example_format(context, actions, (action, cost, prob)),pyvw.vw.lContextualBandit)\n", " vw.learn(vw_format)\n", "\n", " ctr.append(-1*cost_sum/i)\n", " start_counter = end_counter\n", " end_counter = start_counter + num_iterations\n", "\n", " return ctr" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 287 }, "id": "ExchBfTr2lBS", "outputId": "3c23545d-652c-4deb-971c-8da1b409fbd9" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light", "tags": [] }, "output_type": "display_data" } ], "source": [ "# use first reward function initially and then switch to second reward function\n", "\n", "# Instantiate learner in VW\n", "vw = pyvw.vw(\"--cb_explore_adf -q UA --quiet --epsilon 0.2\")\n", "\n", "num_iterations_per_cost_func = 5000\n", "cost_functions = [get_cost, get_cost_new1]\n", "total_iterations = num_iterations_per_cost_func * len(cost_functions)\n", "\n", "ctr = run_simulation_multiple_cost_functions(vw, num_iterations_per_cost_func, users, context1, context2, items, cost_functions)\n", "\n", "plot_ctr(total_iterations, ctr)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 287 }, "id": "mgaEviR42mlI", "outputId": "00ccc48f-f222-4a58-fe14-d8675d4b13cc" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light", "tags": [] }, "output_type": "display_data" } ], "source": [ "# Do not learn\n", "# use first reward function initially and then switch to second reward function\n", "\n", "# Instantiate learner in VW\n", "vw = pyvw.vw(\"--cb_explore_adf -q UA --quiet --epsilon 0.2\")\n", "\n", "num_iterations_per_cost_func = 5000\n", "cost_functions = [get_cost, get_cost_new1]\n", "total_iterations = num_iterations_per_cost_func * len(cost_functions)\n", "\n", "ctr = run_simulation_multiple_cost_functions(vw, num_iterations_per_cost_func, users, context1, context2, items, cost_functions, do_learn=False)\n", "plot_ctr(total_iterations, ctr)" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "2021-06-12-vowpalwabbit-changing-context.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }