{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tictactoe\n", "\n", "Nbviewer [https://nbviewer.jupyter.org/github/shaundsouza/ai-ecosystems-enabling/blob/master/tictactoe.ipynb](https://nbviewer.jupyter.org/github/shaundsouza/ai-ecosystems-enabling/blob/master/tictactoe.ipynb)\n", "\n", "Execute on Binder [https://mybinder.org/v2/gh/shaundsouza/ai-ecosystems-enabling/master?filepath=tictactoe.ipynb](https://mybinder.org/v2/gh/shaundsouza/ai-ecosystems-enabling/master?filepath=tictactoe.ipynb)\n", "\n", "Code [https://nbviewer.jupyter.org/format/script/github/shaundsouza/ai-ecosystems-enabling/blob/master/tictactoe.ipynb](https://nbviewer.jupyter.org/format/script/github/shaundsouza/ai-ecosystems-enabling/blob/master/tictactoe.ipynb)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import math\n", "import pickle\n", "import operator\n", "import random\n", "import time\n", "import sys\n", "\n", "class tictactoe:\n", " N = 3\n", " board = None\n", " V = dict()\n", " alpha = 0.3\n", " epsilon = 0.2\n", " player1 = \"O\"\n", " player2 = \"X\"\n", " empty = \"-\"\n", " player = None\n", " moves = None\n", "\n", " def init_board(self):\n", " self.board = np.empty(self.N**2, dtype=object)\n", " self.board[:] = '-'\n", " self.moves = set()\n", " if not self.V:\n", " self.V[self.pos_board()] = 0.5\n", " self.player = random.choice([self.player1, self.player2])\n", "\n", " def pos_board(self):\n", " # return np.array_str(self.board)[1:-1].strip()\n", " return tuple(self.board)\n", "\n", " def print_board(self):\n", " board_2d = np.reshape(self.board, (self.N, self.N))\n", " sys.stdout.flush()\n", " print(board_2d, flush=True)\n", " print()\n", "\n", " def flip_player(self):\n", " if self.player == self.player1:\n", " return self.player2\n", " else:\n", " return self.player1\n", "\n", " def game_win(self, player):\n", " board_2d = np.reshape([ord(item) for item in self.board], (self.N, self.N))\n", " # self.print_board()\n", " row = np.sum(board_2d, axis=0)\n", " col = np.sum(board_2d, axis=1)\n", "\n", " # if player == -1:\n", " # if row.min() == -3 or col.min() == -3:\n", " # return \"Win\"\n", " # else:\n", " # if row.max() == 3 or col.max() == 3:\n", " # return \"Win\"\n", "\n", " return np.any(row == ord(self.player1) * self.N) or np.any(col == ord(self.player1) * self.N)\\\n", " or np.any(row == ord(self.player2) * self.N) or np.any(col == ord(self.player2) * self.N)\\\n", " or np.trace(board_2d) == ord(self.player1) * self.N or np.trace(board_2d) == ord(self.player2) * self.N\\\n", " or np.trace(np.fliplr(board_2d)) == ord(self.player1) * self.N or np.trace(np.fliplr(board_2d)) == ord(self.player2) * self.N\n", "\n", " def game_play_train(self):\n", " self.init_board()\n", " count = 0\n", "\n", " while True:\n", " s = self.pos_board()\n", " end_game = False\n", " pos = np.random.randint(self.N**2, size=1)\n", " while self.board[pos] != self.empty:\n", " pos = np.random.randint(self.N**2, size=1)\n", "\n", " self.board[pos] = self.player\n", " # self.print_board()\n", "\n", " s_ = self.pos_board()\n", " # print(pos)\n", "\n", " if self.game_win(None):\n", " if s_ in self.V:\n", " assert self.V[s_] == 1\n", " self.V[s_] = 1\n", " # self.V[s] = self.V[s] + self.alpha * (self.V[s_] - self.V[s])\n", " # print(\"Win Player \" + str(self.player))\n", " end_game = True\n", " elif not len(np.where(self.board == self.empty)[0]):\n", " if s_ in self.V:\n", " assert self.V[s_] == 0\n", " self.V[s_] = 0\n", " # self.V[s] = self.V[s] + self.alpha * (self.V[s_] - self.V[s])\n", " # print(\"Draw\")\n", " end_game = True\n", " elif not s_ in self.V:\n", " self.V[s_] = 0.5\n", "\n", " self.V[s] = self.V[s] + self.alpha * (self.V[s_] - self.V[s])\n", "\n", " if end_game:\n", " break\n", "\n", " self.player = self.flip_player()\n", " count = count + 1\n", " if count % 1000 == 0:\n", " print(\"Train\", count)\n", "\n", " return count\n", "\n", " def value_move(self, player):\n", " b = np.array(self.pos_board())\n", " # print(b)\n", "\n", " m = []\n", " m_V = []\n", " ii = np.where(b == self.empty)\n", " # print(ii)\n", " # print(player)\n", "\n", " for i in ii[0]:\n", " c = list(b)\n", " c[i] = player\n", " # print(i, c)\n", " m.append(i)\n", " try:\n", " m_V.append(self.V[tuple(c)])\n", " except:\n", " m_V.append(0.5)\n", "\n", " m = np.array(m)\n", " m_V = np.array(m_V)\n", " # print(m, m_V)\n", " # print(max(m_V))\n", "\n", " ii = np.where(m_V == max(m_V))\n", " # print(ii)\n", " # print(random.choice(m[ii[0]]))\n", "\n", " return random.choice(m[ii])\n", "\n", " def game_play(self, user = False):\n", " self.init_board()\n", " count = 0\n", "\n", " while True:\n", " s = self.pos_board()\n", " end_game = False\n", "\n", " if user:\n", " if self.player == self.player2:\n", " pos = -1\n", " while pos < 0 or pos >= self.N**2 or self.board[pos] != self.empty:\n", " # print(pos)\n", " pos_str = input(\"Enter valid move? \")\n", " pos_i = pos_str.split()\n", " pos = self.N * int(pos_i[0]) + int(pos_i[1])\n", " else:\n", " pos = self.value_move(self.player)\n", " else:\n", " pos = np.random.randint(self.N**2, size=1)\n", " if random.random() < self.epsilon:\n", " while self.board[pos] != self.empty:\n", " # print(pos)\n", " pos = np.random.randint(self.N**2, size=1)\n", " else:\n", " pos = self.value_move(self.player)\n", "\n", " # print(pos)\n", " self.board[pos] = self.player\n", " self.print_board()\n", "\n", " # print(self.game_win(None))\n", " # print(self.board,np.where(self.board == 0))\n", " if self.game_win(None):\n", " print(\"Player Win\", self.player)\n", " end_game = True\n", " elif not len(np.where(self.board == self.empty)[0]):\n", " print(\"Draw\")\n", " end_game = True\n", "\n", " if end_game:\n", " break\n", "\n", " self.player = self.flip_player()\n", " count = count + 1\n", " # time.sleep(.5)\n", "\n", " return count\n", "\n", "\n", "if __name__ == \"__main__\":\n", " a = tictactoe()\n", "\n", " if True:\n", " count = 0\n", " N = 10000\n", " x = 0\n", " for i in range(N):\n", " count = count + a.game_play_train()\n", " x = x + 1\n", " if x % 1000 == 0:\n", " print(\"Train \", x)\n", "\n", " print(\"Avg \", count / N)\n", "\n", " output = open('V.pkl', 'wb')\n", " pickle.dump(a.V, output)\n", " output.close()\n", "\n", " pkl_file = open('V.pkl', 'rb')\n", " a.V = pickle.load(pkl_file)\n", "\n", " # for i in a.V:\n", " # print(i, a.V[i])\n", "\n", " count = 0\n", " N = 1\n", " for i in range(N):\n", " count = count + a.game_play(True)\n", "\n", " print(\"Avg \", count / N)\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }