{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Contextual Bandits" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "FFk0grV-cDLo" }, "outputs": [], "source": [ "import vowpalwabbit\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 } }, "colab_type": "code", "id": "ySF3GkA8qe6z" }, "outputs": [], "source": [ "# generate sample data that could originate from previous random trial, e.g. AB test, for the CB to explore\n", "## data here are equivalent to example in https://github.com/VowpalWabbit/vowpal_wabbit/wiki/Logged-Contextual-Bandit-Example\n", "train_data = [\n", " {\n", " \"action\": 1,\n", " \"cost\": 2,\n", " \"probability\": 0.4,\n", " \"feature1\": \"a\",\n", " \"feature2\": \"c\",\n", " \"feature3\": \"\",\n", " },\n", " {\n", " \"action\": 3,\n", " \"cost\": 0,\n", " \"probability\": 0.2,\n", " \"feature1\": \"b\",\n", " \"feature2\": \"d\",\n", " \"feature3\": \"\",\n", " },\n", " {\n", " \"action\": 4,\n", " \"cost\": 1,\n", " \"probability\": 0.5,\n", " \"feature1\": \"a\",\n", " \"feature2\": \"b\",\n", " \"feature3\": \"\",\n", " },\n", " {\n", " \"action\": 2,\n", " \"cost\": 1,\n", " \"probability\": 0.3,\n", " \"feature1\": \"a\",\n", " \"feature2\": \"b\",\n", " \"feature3\": \"c\",\n", " },\n", " {\n", " \"action\": 3,\n", " \"cost\": 1,\n", " \"probability\": 0.7,\n", " \"feature1\": \"a\",\n", " \"feature2\": \"d\",\n", " \"feature3\": \"\",\n", " },\n", "]\n", "\n", "train_df = pd.DataFrame(train_data)\n", "\n", "## add index to df\n", "train_df[\"index\"] = range(1, len(train_df) + 1)\n", "train_df = train_df.set_index(\"index\")\n", "\n", "# generate some test data that you want the CB to make decisions for, e.g. features describing new users, for the CB to exploit\n", "test_data = [\n", " {\"feature1\": \"b\", \"feature2\": \"c\", \"feature3\": \"\"},\n", " {\"feature1\": \"a\", \"feature2\": \"\", \"feature3\": \"b\"},\n", " {\"feature1\": \"b\", \"feature2\": \"b\", \"feature3\": \"\"},\n", " {\"feature1\": \"a\", \"feature2\": \"\", \"feature3\": \"b\"},\n", "]\n", "\n", "test_df = pd.DataFrame(test_data)\n", "\n", "## add index to df\n", "test_df[\"index\"] = range(1, len(test_df) + 1)\n", "test_df = test_df.set_index(\"index\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 238, "output_extras": [ { "item_id": 1 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 598, "status": "ok", "timestamp": 1520198312440, "user": { "displayName": "Julian Runge", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", "userId": "115854653587627279362" }, "user_tz": 480 }, "id": "IDyiQVTJ4EBs", "outputId": "a5f7d5e7-7478-4c63-db7d-ca9e14ca259b" }, "outputs": [], "source": [ "# take a look at dataframes\n", "print(train_df)\n", "print(test_df)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 85, "output_extras": [ { "item_id": 1 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 669, "status": "ok", "timestamp": 1520198318632, "user": { "displayName": "Julian Runge", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", "userId": "115854653587627279362" }, "user_tz": 480 }, "id": "pXLrjx6PjKio", "outputId": "897a674c-f076-4ba2-8978-f54a8eb9fbc7" }, "outputs": [], "source": [ "# create python model - this stores the model parameters in the python vw object; here a contextual bandit with four possible actions\n", "vw = vowpalwabbit.Workspace(\"--cb 4\", quiet=True)\n", "\n", "# use the learn method to train the vw model, train model row by row using a loop\n", "for i in train_df.index:\n", " ## provide data to cb in requested format\n", " action = train_df.loc[i, \"action\"]\n", " cost = train_df.loc[i, \"cost\"]\n", " probability = train_df.loc[i, \"probability\"]\n", " feature1 = train_df.loc[i, \"feature1\"]\n", " feature2 = train_df.loc[i, \"feature2\"]\n", " feature3 = train_df.loc[i, \"feature3\"]\n", " ## do the actual learning\n", " vw.learn(\n", " str(action)\n", " + \":\"\n", " + str(cost)\n", " + \":\"\n", " + str(probability)\n", " + \" | \"\n", " + str(feature1)\n", " + \" \"\n", " + str(feature2)\n", " + \" \"\n", " + str(feature3)\n", " )\n", "\n", "# use the same model object that was trained to perform predictions\n", "\n", "# predict row by row and output results\n", "for j in test_df.index:\n", " feature1 = test_df.loc[j, \"feature1\"]\n", " feature2 = test_df.loc[j, \"feature2\"]\n", " feature3 = test_df.loc[j, \"feature3\"]\n", " choice = vw.predict(\n", " \"| \" + str(feature1) + \" \" + str(feature2) + \" \" + str(feature3)\n", " )\n", " print(j, choice)\n", "\n", "# the CB assigns every instance to action 3 as it should per the cost structure of the train data; you can play with the cost structure to see that the CB updates its predictions accordingly" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "base_uri": "https://localhost:8080/", "height": 34, "output_extras": [ { "item_id": 1 } ] }, "colab_type": "code", "executionInfo": { "elapsed": 605, "status": "ok", "timestamp": 1520198381640, "user": { "displayName": "Julian Runge", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", "userId": "115854653587627279362" }, "user_tz": 480 }, "id": "60bK90HlThA2", "outputId": "845ef0c1-465c-4748-b5bb-6b9d332a6820" }, "outputs": [], "source": [ "# BONUS: save and load the CB model\n", "# save model\n", "vw.save(\"cb.model\")\n", "del vw\n", "# load from saved file\n", "vw = vowpalwabbit.Workspace(\"--cb 4 -i cb.model\", quiet=True)\n", "print(vw.predict(\"| a b\"))" ] } ], "metadata": { "colab": { "collapsed_sections": [], "default_view": {}, "name": "Implementing a Contextual Bandit Using VW's Python Wrapper.ipynb", "provenance": [ { "file_id": "1Njy1txYPXqVwueHudbkF40zTbwkui1xA", "timestamp": 1519781379506 }, { "file_id": "11qz1CSi8-8yQACKJzp3G8VPUqzB3V1Hw", "timestamp": 1519780916480 } ], "version": "0.3.2", "views": {} }, "interpreter": { "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a" }, "kernelspec": { "display_name": "Python 3.8.5 64-bit", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 1 }