{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Tictactoe\n",
    "\n",    
    "Nbviewer [https://nbviewer.jupyter.org/github/shaundsouza/ai-ecosystems-enabling/blob/master/tictactoe.ipynb](https://nbviewer.jupyter.org/github/shaundsouza/ai-ecosystems-enabling/blob/master/tictactoe.ipynb)\n",
    "\n",
	"Execute on Binder [https://mybinder.org/v2/gh/shaundsouza/ai-ecosystems-enabling/master?filepath=tictactoe.ipynb](https://mybinder.org/v2/gh/shaundsouza/ai-ecosystems-enabling/master?filepath=tictactoe.ipynb)\n",
	"\n",
    "Code [https://nbviewer.jupyter.org/format/script/github/shaundsouza/ai-ecosystems-enabling/blob/master/tictactoe.ipynb](https://nbviewer.jupyter.org/format/script/github/shaundsouza/ai-ecosystems-enabling/blob/master/tictactoe.ipynb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import math\n",
    "import pickle\n",
    "import operator\n",
    "import random\n",
    "import time\n",
    "import sys\n",
    "\n",
    "class tictactoe:\n",
    "    N = 3\n",
    "    board = None\n",
    "    V = dict()\n",
    "    alpha = 0.3\n",
    "    epsilon = 0.2\n",
    "    player1 = \"O\"\n",
    "    player2 = \"X\"\n",
    "    empty = \"-\"\n",
    "    player = None\n",
    "    moves = None\n",
    "\n",
    "    def init_board(self):\n",
    "        self.board = np.empty(self.N**2, dtype=object)\n",
    "        self.board[:] = '-'\n",
    "        self.moves = set()\n",
    "        if not self.V:\n",
    "            self.V[self.pos_board()] = 0.5\n",
    "        self.player = random.choice([self.player1, self.player2])\n",
    "\n",
    "    def pos_board(self):\n",
    "        # return np.array_str(self.board)[1:-1].strip()\n",
    "        return tuple(self.board)\n",
    "\n",
    "    def print_board(self):\n",
    "        board_2d = np.reshape(self.board, (self.N, self.N))\n",
    "        sys.stdout.flush()\n",
    "        print(board_2d, flush=True)\n",
    "        print()\n",
    "\n",
    "    def flip_player(self):\n",
    "        if self.player == self.player1:\n",
    "            return self.player2\n",
    "        else:\n",
    "            return self.player1\n",
    "\n",
    "    def game_win(self, player):\n",
    "        board_2d = np.reshape([ord(item) for item in self.board], (self.N, self.N))\n",
    "        # self.print_board()\n",
    "        row = np.sum(board_2d, axis=0)\n",
    "        col = np.sum(board_2d, axis=1)\n",
    "\n",
    "        # if player == -1:\n",
    "        #     if row.min() == -3 or col.min() == -3:\n",
    "        #         return \"Win\"\n",
    "        # else:\n",
    "        #     if row.max() == 3 or col.max() == 3:\n",
    "        #         return \"Win\"\n",
    "\n",
    "        return np.any(row == ord(self.player1) * self.N) or np.any(col == ord(self.player1) * self.N)\\\n",
    "               or np.any(row == ord(self.player2) * self.N) or np.any(col == ord(self.player2) * self.N)\\\n",
    "               or np.trace(board_2d) == ord(self.player1) * self.N or np.trace(board_2d) == ord(self.player2) * self.N\\\n",
    "               or np.trace(np.fliplr(board_2d)) == ord(self.player1) * self.N or np.trace(np.fliplr(board_2d)) == ord(self.player2) * self.N\n",
    "\n",
    "    def game_play_train(self):\n",
    "        self.init_board()\n",
    "        count = 0\n",
    "\n",
    "        while True:\n",
    "            s = self.pos_board()\n",
    "            end_game = False\n",
    "            pos = np.random.randint(self.N**2, size=1)\n",
    "            while self.board[pos] != self.empty:\n",
    "                pos = np.random.randint(self.N**2, size=1)\n",
    "\n",
    "            self.board[pos] = self.player\n",
    "            # self.print_board()\n",
    "\n",
    "            s_ = self.pos_board()\n",
    "            # print(pos)\n",
    "\n",
    "            if self.game_win(None):\n",
    "                if s_ in self.V:\n",
    "                    assert self.V[s_] == 1\n",
    "                self.V[s_] = 1\n",
    "                # self.V[s] = self.V[s] + self.alpha * (self.V[s_] - self.V[s])\n",
    "                # print(\"Win Player \" + str(self.player))\n",
    "                end_game = True\n",
    "            elif not len(np.where(self.board == self.empty)[0]):\n",
    "                if s_ in self.V:\n",
    "                    assert self.V[s_] == 0\n",
    "                self.V[s_] = 0\n",
    "                # self.V[s] = self.V[s] + self.alpha * (self.V[s_] - self.V[s])\n",
    "                # print(\"Draw\")\n",
    "                end_game = True\n",
    "            elif not s_ in self.V:\n",
    "                self.V[s_] = 0.5\n",
    "\n",
    "            self.V[s] = self.V[s] + self.alpha * (self.V[s_] - self.V[s])\n",
    "\n",
    "            if end_game:\n",
    "                break\n",
    "\n",
    "            self.player = self.flip_player()\n",
    "            count = count + 1\n",
    "            if count % 1000 == 0:\n",
    "                print(\"Train\", count)\n",
    "\n",
    "        return count\n",
    "\n",
    "    def value_move(self, player):\n",
    "        b = np.array(self.pos_board())\n",
    "        # print(b)\n",
    "\n",
    "        m = []\n",
    "        m_V = []\n",
    "        ii = np.where(b == self.empty)\n",
    "        # print(ii)\n",
    "        # print(player)\n",
    "\n",
    "        for i in ii[0]:\n",
    "            c = list(b)\n",
    "            c[i] = player\n",
    "            # print(i, c)\n",
    "            m.append(i)\n",
    "            try:\n",
    "                m_V.append(self.V[tuple(c)])\n",
    "            except:\n",
    "                m_V.append(0.5)\n",
    "\n",
    "        m = np.array(m)\n",
    "        m_V = np.array(m_V)\n",
    "        # print(m, m_V)\n",
    "        # print(max(m_V))\n",
    "\n",
    "        ii = np.where(m_V == max(m_V))\n",
    "        # print(ii)\n",
    "        # print(random.choice(m[ii[0]]))\n",
    "\n",
    "        return random.choice(m[ii])\n",
    "\n",
    "    def game_play(self, user = False):\n",
    "        self.init_board()\n",
    "        count = 0\n",
    "\n",
    "        while True:\n",
    "            s = self.pos_board()\n",
    "            end_game = False\n",
    "\n",
    "            if user:\n",
    "                if self.player == self.player2:\n",
    "                    pos = -1\n",
    "                    while pos < 0 or pos >= self.N**2 or self.board[pos] != self.empty:\n",
    "                        # print(pos)\n",
    "                        pos_str = input(\"Enter valid move? \")\n",
    "                        pos_i = pos_str.split()\n",
    "                        pos = self.N * int(pos_i[0]) + int(pos_i[1])\n",
    "                else:\n",
    "                    pos = self.value_move(self.player)\n",
    "            else:\n",
    "                pos = np.random.randint(self.N**2, size=1)\n",
    "                if random.random() < self.epsilon:\n",
    "                    while self.board[pos] != self.empty:\n",
    "                        # print(pos)\n",
    "                        pos = np.random.randint(self.N**2, size=1)\n",
    "                else:\n",
    "                    pos = self.value_move(self.player)\n",
    "\n",
    "            # print(pos)\n",
    "            self.board[pos] = self.player\n",
    "            self.print_board()\n",
    "\n",
    "            # print(self.game_win(None))\n",
    "            # print(self.board,np.where(self.board == 0))\n",
    "            if self.game_win(None):\n",
    "                print(\"Player Win\", self.player)\n",
    "                end_game = True\n",
    "            elif not len(np.where(self.board == self.empty)[0]):\n",
    "                print(\"Draw\")\n",
    "                end_game = True\n",
    "\n",
    "            if end_game:\n",
    "                break\n",
    "\n",
    "            self.player = self.flip_player()\n",
    "            count = count + 1\n",
    "            # time.sleep(.5)\n",
    "\n",
    "        return count\n",
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    a = tictactoe()\n",
    "\n",
    "    if True:\n",
    "        count = 0\n",
    "        N = 10000\n",
    "        x = 0\n",
    "        for i in range(N):\n",
    "            count = count + a.game_play_train()\n",
    "            x = x + 1\n",
    "            if x % 1000 == 0:\n",
    "                print(\"Train \", x)\n",
    "\n",
    "        print(\"Avg \", count / N)\n",
    "\n",
    "        output = open('V.pkl', 'wb')\n",
    "        pickle.dump(a.V, output)\n",
    "        output.close()\n",
    "\n",
    "    pkl_file = open('V.pkl', 'rb')\n",
    "    a.V = pickle.load(pkl_file)\n",
    "\n",
    "    # for i in a.V:\n",
    "    #     print(i, a.V[i])\n",
    "\n",
    "    count = 0\n",
    "    N = 1\n",
    "    for i in range(N):\n",
    "        count = count + a.game_play(True)\n",
    "\n",
    "    print(\"Avg \", count / N)\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}