{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Implementing a Contextual Bandit Using VW's Python Wrapper.ipynb", "version": "0.3.2", "views": {}, "default_view": {}, "provenance": [ { "file_id": "1Njy1txYPXqVwueHudbkF40zTbwkui1xA", "timestamp": 1519781379506 }, { "file_id": "11qz1CSi8-8yQACKJzp3G8VPUqzB3V1Hw", "timestamp": 1519780916480 } ], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "metadata": { "id": "FFk0grV-cDLo", "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } } }, "cell_type": "code", "source": [ "# load required packages\n", "import pandas as pd\n", "import sklearn as sk\n", "import numpy as np" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "OPlledP-8oo9", "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "output_extras": [ { "item_id": 8 } ], "base_uri": "https://localhost:8080/", "height": 357 }, "outputId": "53fe3b47-619f-44c4-805f-88e1b26b35db", "executionInfo": { "status": "ok", "timestamp": 1520197158295, "user_tz": 480, "elapsed": 7226, "user": { "displayName": "Julian Runge", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", "userId": "115854653587627279362" } } }, "cell_type": "code", "source": [ "# install vowpal wabbit\n", "!pip install boost\n", "!apt-get install libboost-program-options-dev zlib1g-dev libboost-python-dev -y\n", "!pip install vowpalwabbit" ], "execution_count": 5, "outputs": [ { "output_type": "stream", "text": [ "Requirement already satisfied: boost in /usr/local/lib/python3.6/dist-packages\r\n", "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.6/dist-packages (from boost)\n", "Requirement already satisfied: Mastodon.py in /usr/local/lib/python3.6/dist-packages (from boost)\n", "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from Mastodon.py->boost)\n", "Requirement already satisfied: decorator in /usr/local/lib/python3.6/dist-packages (from Mastodon.py->boost)\n", "Requirement already satisfied: pytz in /usr/local/lib/python3.6/dist-packages (from Mastodon.py->boost)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.6/dist-packages (from Mastodon.py->boost)\n", "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from Mastodon.py->boost)\n", "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python3.6/dist-packages (from requests->Mastodon.py->boost)\n", "Requirement already satisfied: idna<2.7,>=2.5 in /usr/local/lib/python3.6/dist-packages (from requests->Mastodon.py->boost)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.6/dist-packages (from requests->Mastodon.py->boost)\n", "Requirement already satisfied: urllib3<1.23,>=1.21.1 in /usr/local/lib/python3.6/dist-packages (from requests->Mastodon.py->boost)\n", "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "libboost-program-options-dev is already the newest version (1.62.0.1).\n", "zlib1g-dev is already the newest version (1:1.2.11.dfsg-0ubuntu2).\n", "libboost-python-dev is already the newest version (1.62.0.1).\n", "0 upgraded, 0 newly installed, 0 to remove and 1 not upgraded.\n", "Requirement already satisfied: vowpalwabbit in /usr/local/lib/python3.6/dist-packages\n" ], "name": "stdout" } ] }, { "metadata": { "id": "ySF3GkA8qe6z", "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 } } }, "cell_type": "code", "source": [ "# generate sample data that could originate from previous random trial, e.g. AB test, for the CB to explore\n", "## data here are equivalent to example in https://github.com/JohnLangford/vowpal_wabbit/wiki/Contextual-Bandit-Example\n", "train_data = [{'action': 1, 'cost': 2, 'probability': 0.4, 'feature1': 'a', 'feature2': 'c', 'feature3': ''},\n", " {'action': 3, 'cost': 0, 'probability': 0.2, 'feature1': 'b', 'feature2': 'd', 'feature3': ''},\n", " {'action': 4, 'cost': 1, 'probability': 0.5, 'feature1': 'a', 'feature2': 'b', 'feature3': ''},\n", " {'action': 2, 'cost': 1, 'probability': 0.3, 'feature1': 'a', 'feature2': 'b', 'feature3': 'c'},\n", " {'action': 3, 'cost': 1, 'probability': 0.7, 'feature1': 'a', 'feature2': 'd', 'feature3': ''}]\n", "\n", "train_df = pd.DataFrame(train_data)\n", "\n", "## add index to df\n", "train_df['index'] = range(1, len(train_df) + 1)\n", "train_df = train_df.set_index(\"index\")\n", "\n", "# generate some test data that you want the CB to make decisions for, e.g. features describing new users, for the CB to exploit\n", "test_data = [{'feature1': 'b', 'feature2': 'c', 'feature3': ''},\n", " {'feature1': 'a', 'feature2': '', 'feature3': 'b'},\n", " {'feature1': 'b', 'feature2': 'b', 'feature3': ''},\n", " {'feature1': 'a', 'feature2': '', 'feature3': 'b'}]\n", "\n", "test_df = pd.DataFrame(test_data)\n", "\n", "## add index to df\n", "test_df['index'] = range(1, len(test_df) + 1)\n", "test_df = test_df.set_index(\"index\")" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "IDyiQVTJ4EBs", "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "output_extras": [ { "item_id": 1 } ], "base_uri": "https://localhost:8080/", "height": 238 }, "outputId": "a5f7d5e7-7478-4c63-db7d-ca9e14ca259b", "executionInfo": { "status": "ok", "timestamp": 1520198312440, "user_tz": 480, "elapsed": 598, "user": { "displayName": "Julian Runge", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", "userId": "115854653587627279362" } } }, "cell_type": "code", "source": [ "# take a look at dataframes\n", "print(train_df)\n", "print(test_df)" ], "execution_count": 39, "outputs": [ { "output_type": "stream", "text": [ " action cost feature1 feature2 feature3 probability\n", "index \n", "1 1 2 a c 0.4\n", "2 3 0 b d 0.2\n", "3 4 1 a b 0.5\n", "4 2 1 a b c 0.3\n", "5 3 1 a d 0.7\n", " feature1 feature2 feature3\n", "index \n", "1 b c \n", "2 a b\n", "3 b b \n", "4 a b\n" ], "name": "stdout" } ] }, { "metadata": { "id": "pXLrjx6PjKio", "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "output_extras": [ { "item_id": 1 } ], "base_uri": "https://localhost:8080/", "height": 85 }, "outputId": "897a674c-f076-4ba2-8978-f54a8eb9fbc7", "executionInfo": { "status": "ok", "timestamp": 1520198318632, "user_tz": 480, "elapsed": 669, "user": { "displayName": "Julian Runge", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", "userId": "115854653587627279362" } } }, "cell_type": "code", "source": [ "# import vowpal wabbit's python wrapper\n", "from vowpalwabbit import pyvw\n", "\n", "# create python model - this stores the model parameters in the python vw object; here a contextual bandit with four possible actions\n", "vw = pyvw.vw(\"--cb 4\")\n", "\n", "# use the learn method to train the vw model, train model row by row using a loop\n", "i==1\n", "for i in train_df.index:\n", " ## provide data to cb in requested format\n", " action = train_df.loc[i, \"action\"]\n", " cost = train_df.loc[i, \"cost\"]\n", " probability = train_df.loc[i, \"probability\"]\n", " feature1 = train_df.loc[i, \"feature1\"]\n", " feature2 = train_df.loc[i, \"feature2\"]\n", " feature3 = train_df.loc[i, \"feature3\"]\n", " ## do the actual learning\n", " vw.learn(str(action)+\":\"+str(cost)+\":\"+str(probability)+\" | \"+str(feature1)+\" \"+str(feature2)+\" \"+str(feature3))\n", "\n", "# use the same model object that was trained to perform predictions\n", "\n", "# predict row by row and output results\n", "j==1\n", "for j in test_df.index:\n", " feature1 = test_df.loc[j, \"feature1\"]\n", " feature2 = test_df.loc[j, \"feature2\"]\n", " feature3 = test_df.loc[j, \"feature3\"]\n", " choice = vw.predict(\"| \"+str(feature1)+\" \"+str(feature2)+\" \"+str(feature3))\n", " print(j, choice)\n", "\n", "# the CB assigns every instance to action 3 as it should per the cost structure of the train data; you can play with the cost structure to see that the CB updates its predictions accordingly" ], "execution_count": 40, "outputs": [ { "output_type": "stream", "text": [ "1 3\n", "2 3\n", "3 3\n", "4 3\n" ], "name": "stdout" } ] }, { "metadata": { "id": "60bK90HlThA2", "colab_type": "code", "colab": { "autoexec": { "startup": false, "wait_interval": 0 }, "output_extras": [ { "item_id": 1 } ], "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "845ef0c1-465c-4748-b5bb-6b9d332a6820", "executionInfo": { "status": "ok", "timestamp": 1520198381640, "user_tz": 480, "elapsed": 605, "user": { "displayName": "Julian Runge", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s128", "userId": "115854653587627279362" } } }, "cell_type": "code", "source": [ "# BONUS: save and load the CB model\n", "# save model\n", "vw.save('cb.model')\n", "del vw\n", "# load from saved file\n", "vw = pyvw.vw(\"--cb 4 -i cb.model\")\n", "print(vw.predict('| a b'))" ], "execution_count": 41, "outputs": [ { "output_type": "stream", "text": [ "3\n" ], "name": "stdout" } ] } ] }