{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import seaborn as sns\n",
    "from matplotlib import pyplot as plt\n",
    "from matplotlib import animation\n",
    "import imageio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Sender:\n",
    "        \n",
    "    def __init__(self, n_inputs: int, n_messages: int, eps: float = 1e-6):\n",
    "        self.n_messages = n_messages\n",
    "        self.message_weights = np.zeros((n_inputs, n_messages))\n",
    "        self.message_weights.fill(eps)\n",
    "        self.last_situation = (0, 0)\n",
    "        \n",
    "    def send_message(self, input: int) -> int:\n",
    "        probs = np.exp(self.message_weights[input, :])/np.sum(np.exp(self.message_weights[input, :]))\n",
    "        message = np.random.choice(self.n_messages, p=probs)\n",
    "        self.last_situation = (input, message)\n",
    "        return message\n",
    "\n",
    "    def learn_from_feedback(self, reward: int) -> None:\n",
    "        self.message_weights[self.last_situation] += reward"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Receiver:\n",
    "        \n",
    "    def __init__(self, n_messages: int, n_actions: int, eps: float = 1e-6):\n",
    "        self.n_actions = n_actions\n",
    "        self.action_weights = np.ndarray((n_messages, n_actions))\n",
    "        self.action_weights.fill(eps)\n",
    "        self.last_situation = (0, 0)\n",
    "        \n",
    "    def act(self, message: int) -> int:\n",
    "        probs = np.exp(self.action_weights[message, :])/np.sum(np.exp(self.action_weights[message, :]))\n",
    "        action = np.random.choice(self.n_actions, p=probs)\n",
    "        self.last_situation = (message, action)\n",
    "        return action\n",
    "\n",
    "    def learn_from_feedback(self, reward: int) -> None:\n",
    "        self.action_weights[self.last_situation] += reward"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "class World:\n",
    "    def __init__(self, n_states: int, seed: int = 1701):\n",
    "        self.n_states = n_states\n",
    "        self.state = 0\n",
    "        self.rng = np.random.RandomState(seed)\n",
    "        \n",
    "    def emit_state(self) -> int:\n",
    "        self.state = self.rng.randint(self.n_states)\n",
    "        return self.state\n",
    "    \n",
    "    def evaluate_action(self, action: int) -> int:\n",
    "        return 1 if action == self.state else -1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 0, last 100 epochs reward: -0.01\n",
      "4 4 0 -1\n",
      "Epoch 100, last 100 epochs reward: -0.74\n",
      "1 7 2 -1\n",
      "Epoch 200, last 100 epochs reward: -0.56\n",
      "9 8 9 1\n",
      "Epoch 300, last 100 epochs reward: -0.7\n",
      "9 8 9 1\n",
      "Epoch 400, last 100 epochs reward: -0.7\n",
      "6 8 9 -1\n",
      "Epoch 500, last 100 epochs reward: -0.5\n",
      "4 7 6 -1\n",
      "Epoch 600, last 100 epochs reward: -0.44\n",
      "8 4 0 -1\n",
      "Epoch 700, last 100 epochs reward: -0.42\n",
      "4 2 6 -1\n",
      "Epoch 800, last 100 epochs reward: -0.1\n",
      "7 9 2 -1\n",
      "Epoch 900, last 100 epochs reward: -0.22\n",
      "1 5 6 -1\n",
      "Epoch 1000, last 100 epochs reward: -0.02\n",
      "4 6 4 1\n",
      "Epoch 1100, last 100 epochs reward: 0.34\n",
      "6 5 6 1\n",
      "Epoch 1200, last 100 epochs reward: 0.28\n",
      "9 2 6 -1\n",
      "Epoch 1300, last 100 epochs reward: 0.54\n",
      "7 6 4 -1\n",
      "Epoch 1400, last 100 epochs reward: 0.54\n",
      "4 6 4 1\n",
      "Epoch 1500, last 100 epochs reward: 0.62\n",
      "7 8 6 -1\n",
      "Epoch 1600, last 100 epochs reward: 0.7\n",
      "6 5 6 1\n",
      "Epoch 1700, last 100 epochs reward: 0.62\n",
      "2 9 2 1\n",
      "Epoch 1800, last 100 epochs reward: 0.86\n",
      "4 6 4 1\n",
      "Epoch 1900, last 100 epochs reward: 0.82\n",
      "9 2 9 1\n",
      "Epoch 2000, last 100 epochs reward: 0.78\n",
      "2 9 2 1\n",
      "Epoch 2100, last 100 epochs reward: 0.94\n",
      "0 0 0 1\n",
      "Epoch 2200, last 100 epochs reward: 1.0\n",
      "3 1 3 1\n",
      "Epoch 2300, last 100 epochs reward: 1.0\n",
      "8 4 8 1\n",
      "Epoch 2400, last 100 epochs reward: 1.0\n",
      "1 3 1 1\n",
      "Epoch 2500, last 100 epochs reward: 1.0\n",
      "8 4 8 1\n",
      "Epoch 2600, last 100 epochs reward: 1.0\n",
      "2 9 2 1\n",
      "Epoch 2700, last 100 epochs reward: 1.0\n",
      "8 4 8 1\n",
      "Epoch 2800, last 100 epochs reward: 1.0\n",
      "6 5 6 1\n",
      "Epoch 2900, last 100 epochs reward: 1.0\n",
      "6 5 6 1\n",
      "Observation to message mapping:\n",
      "[0 3 9 1 6 8 5 7 4 2]\n",
      "Message to action mapping:\n",
      "[0 3 9 1 8 6 4 7 5 2]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<Figure size 432x288 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "sender, receiver = Sender(10, 10), Receiver(10, 10)\n",
    "world = World(10)\n",
    "past_rewards = 0\n",
    "matrices = []\n",
    "for epoch in range(3000):\n",
    "    world_state = world.emit_state()\n",
    "    message = sender.send_message(world_state)\n",
    "    action = receiver.act(message)\n",
    "    reward = world.evaluate_action(action)\n",
    "    receiver.learn_from_feedback(reward)\n",
    "    sender.learn_from_feedback(reward)\n",
    "    past_rewards += reward\n",
    "    if epoch % 25 == 0:\n",
    "        plt.tight_layout(pad=0)\n",
    "        plot = sns.heatmap(\n",
    "            np.exp(receiver.action_weights)/np.exp(receiver.action_weights).sum(axis=0), \n",
    "            square=True, cbar=False, annot=True, fmt='.1f'\n",
    "        ).get_figure()\n",
    "        plt.xlabel('messages')\n",
    "        plt.ylabel('actions')\n",
    "        plt.title(f'Receiver\\'s weights, rollout {epoch}')\n",
    "        plt.savefig(f\"receiver_{epoch}.png\")\n",
    "        plt.clf()\n",
    "        \n",
    "        plot = sns.heatmap(\n",
    "            np.exp(sender.message_weights)/np.exp(sender.message_weights).sum(axis=0), \n",
    "            square=True, cbar=False,annot=True, fmt='.1f'\n",
    "        ).get_figure()\n",
    "        plt.xlabel('world states')\n",
    "        plt.ylabel('messages')\n",
    "        plt.title(f'Sender\\'s weights, rollout {epoch}')\n",
    "        plt.savefig(f\"sender_{epoch}.png\")\n",
    "        plt.clf()\n",
    "           \n",
    "    if epoch % 100 == 0:\n",
    "        print(f'Epoch {epoch}, last 100 epochs reward: {past_rewards/100}')\n",
    "        print(world_state, message, action, reward)\n",
    "        past_rewards = 0\n",
    "\n",
    "print(\"Observation to message mapping:\")\n",
    "print(sender.message_weights.argmax(1))\n",
    "print(\"Message to action mapping:\")\n",
    "print(receiver.action_weights.argmax(1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_gif(filename_base):\n",
    "    images = []\n",
    "    for filename in [f'{filename_base}_{i}.png' for i in range(3000) if i % 25 == 0]:\n",
    "        images.append(imageio.imread(filename))\n",
    "    imageio.mimsave(f'{filename_base}.gif', images)\n",
    "    \n",
    "make_gif('sender')\n",
    "make_gif('receiver')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}