{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "import theano\n", "import theano.tensor as T\n", "\n", "import lasagne\n", "from lasagne.layers.dnn import Conv2DDNNLayer as conv2d\n", "floatX = theano.config.floatX\n", "floatX" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from IPython.display import HTML, display" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%load_ext Cython" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%%cython\n", "# cython: infer_types=True, annotation_typing=True\n", "## cython: infer_types.verbose=True \n", "from IPython.display import HTML, display\n", "import numpy as np\n", "\n", "floatX = np.float32\n", "\n", "binary6 = np.array([ list(map(int,bin(2**6+i)[:2:-1])) for i in range(2**6)], dtype=floatX)\n", "height = np.array([-1]*65, dtype=np.int32)\n", "for __i in range(6):\n", " height[2**__i]=__i\n", "\n", "cdef class Connect4:\n", " cdef public:\n", " long turn\n", " long long[2] data\n", " cpdef long get_col_row(self, col: long, row: long):\n", " pos = col * 7 + row\n", " mask = (1) << pos \n", " if self.data[1] & mask:\n", " return 2\n", " return bool(self.data[0] & mask)\n", " \n", " cpdef long is_end(self):\n", " cdef long long mask\n", " bitboard = self.data[1-self.turn%2]\n", " bound = (1)<<48 # 49 = 7*(6+1) \n", " # horizontal: 0x204081 = 1|(1<<7)|(1<<14)|(1<<21)\n", " # vertical: 0xf = 1|(1<<1)|(1<<2)|(1<<3)\n", " # up-right: 0x1010101 = 1|(1<<8)|(1<<16)|(1<<24)\n", " # down-right: 0x208208 = (1<<3)|(1<<9)|(1<<15)|(1<<21)\n", " for mask in [0x204081, 0xf, 0x1010101, 0x208208]:\n", " while mask < bound:\n", " if mask & bitboard == mask:\n", " return True\n", " mask <<= 1\n", " return False\n", " \n", " cpdef set_col_row(self, col:long, row:long, value:long):\n", " # assert value in [0,1,2]\n", " pos = col * 7 + row\n", " mask = (1) << pos\n", " neg_mask = ~mask \n", " if value == 1 or value ==2:\n", " self.data[value-1] |= mask\n", " self.data[2-value] &= neg_mask\n", " else:\n", " self.data[0] &= neg_mask\n", " self.data[1] &= neg_mask\n", " \n", " def __init__(self, data=None, turn=0):\n", " if data is not None:\n", " self.data = data[:]\n", " else:\n", " self.data = [0, 0]\n", " self.turn = turn\n", " \n", " cpdef remove(self, col:long):\n", " shift = col*7\n", " mask = (((self.data[0]|self.data[1]) >> shift) &0x3f) +1\n", " mask = (mask >> 1) << shift\n", " # print(shift, hex(mask), hex(self.data[0]), hex(self.data[1]))\n", " neg_mask = ~mask\n", " self.data[0] &= neg_mask\n", " self.data[1] &= neg_mask\n", " \n", " def _np_branch(self):\n", " c = self.turn%2 # who's turn\n", " base = np.zeros((3,7,6), dtype=floatX)\n", " base[2] = 1\n", " pos = []\n", " moves = []\n", " red, yellow = self.data\n", " for i in range(7):\n", " mask = ((red|yellow) &0x3f) + 1\n", " p = height[mask]\n", " if p != -1:\n", " moves.append(i)\n", " pos.append(height[mask])\n", " base[c, i] = binary6[red&0x3f]\n", " base[1-c, i] = binary6[yellow&0x3f]\n", " red >>= 7\n", " yellow >>= 7\n", " boards = np.zeros( (len(moves), 3, 7, 6), dtype=floatX)\n", " for i in range(len(moves)):\n", " m = moves[i]\n", " p = pos[i]\n", " boards[i]=base\n", " boards[i, 0, m, p] = 1\n", " return moves, boards\n", " \n", " def _np_board(self):\n", " c = (self.turn-1)%2 # who played\n", " board = np.ones((3, 7, 6), dtype=floatX)\n", " red, yellow = self.data\n", " for i in range(7):\n", " mask = ((red|yellow) &0x3f) + 1\n", " board[c, i] = binary6[red&0x3f]\n", " board[1-c, i] = binary6[yellow&0x3f]\n", " red >>= 7\n", " yellow >>= 7\n", " return board\n", " \n", " \n", " cpdef move(self, col:long, test=False):\n", " # assert 0<= col <7\n", " shift = col*7\n", " mask = (((self.data[0]|self.data[1]) >> shift) &0x3f) +1\n", " # print(\"mask=\", mask)\n", " if mask >= 64:\n", " return None\n", " if not test:\n", " self.data[self.turn%2] |= (mask<\"\n", " header = \"\"\"
\"\"\"\n", " header += \"\\n\".join(imgstr%('empty', pos(5-j), pos(i), 0) for i in range(7) for j in range(6))\n", " return header +\"\\n\".join(imgstr%('red_coin' if c==1 else 'yellow_coin', pos(5-j), pos(i), 2) for (i,j,c) in self.board_data()) +\"
\"\n", " \n", " def display(self):\n", " display(HTML(self._repr_html_()))\n", " \n", " def __repr__(self):\n", " row_str = lambda j: \"\".join(\".ox\"[self.get_col_row(i,j)] for i in range(7))\n", " return \"\\n\".join(row_str(j) for j in range(5,-1,-1))\n", "\n", "from random import randint\n", "def random_play(init_data=None, init_turn=0, display=False):\n", " game = Connect4(init_data, init_turn)\n", " while game.turn < 42 and not game.is_end():\n", " while game.move(randint(0,6)) is None:\n", " continue\n", " if display:\n", " game.display()\n", " if game.is_end():\n", " return game.turn\n", " return 0" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def MC_agent(_game, N=200):\n", " score = [-1.0*N]*7\n", " for i in range(7):\n", " game = Connect4(_game.data, _game.turn)\n", " if game.move(i):\n", " if game.is_end():\n", " return i\n", " s = 0\n", " for j in range(N):\n", " #print(\"move\", i, \"case\", j)\n", " r = random_play(game.data, game.turn)\n", " turn = (r-1)%2\n", " if r == 0:\n", " pass\n", " elif (r-1)%2 == _game.turn%2:\n", " s += 0.95** (r-_game.turn-1)\n", " else:\n", " s -= .95** (r-_game.turn-1)\n", " score[i] = s/N\n", " return max(zip(score, range(7)))[1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def random_vs_MC(init_data=None, init_turn=0, display=False):\n", " game = Connect4(init_data, init_turn)\n", " while game.turn < 42 and not game.is_end():\n", " if game.turn%2 == 0:\n", " while game.move(randint(0,6)) is None:\n", " continue\n", " else:\n", " i = MC_agent(game)\n", " game.move(i)\n", " if display == 'all':\n", " game.display()\n", " if display:\n", " game.display()\n", " if game.is_end():\n", " return game.turn\n", " return 0" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "input_var = T.tensor4('inputs')\n", "target_var = T.vector('targets')\n", "l_in = lasagne.layers.InputLayer(shape=(None, 3, 7, 6), input_var=input_var)\n", "#l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)\n", "_ = conv2d(l_in, num_filters=400, filter_size = 5, pad='same')\n", "#_ = lasagne.layers.DropoutLayer(_, p=0.2)\n", "_ = conv2d(_, num_filters=200, filter_size = 3, pad='same')\n", "_ = conv2d(_, num_filters=100, filter_size = 3, pad='same')\n", "_ = conv2d(_, num_filters=50, filter_size = 3, pad='same')\n", "_ = conv2d(_, num_filters=25, filter_size = 3, pad='same')\n", "l_out = lasagne.layers.DenseLayer(_, num_units=1, nonlinearity=lasagne.nonlinearities.tanh, W=lasagne.init.GlorotUniform())\n", "\n", "prediction = lasagne.layers.get_output(l_out).flatten()\n", "V = theano.function([input_var], prediction)\n", "#loss = lasagne.objectives.binary_crossentropy(prediction, target_var)\n", "loss = lasagne.objectives.squared_error(prediction, target_var)\n", "\n", "loss = loss.mean()\n", "\n", "params = lasagne.layers.get_all_params(l_out, trainable=True)\n", "updates = lasagne.updates.adam(loss, params)\n", "train_fn = theano.function([input_var, target_var], loss, updates=updates)\n", "test_prediction = lasagne.layers.get_output(l_out, deterministic=True).flatten()\n", "test_V = theano.function([input_var], T.gt(test_prediction, 0.))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from random import random, randint\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def player_NN(game):\n", " moves, boards = game._np_branch()\n", " return moves[np.argmax(V(boards))]\n", "\n", "def player_random(game):\n", " while 1:\n", " r = randint(0,6)\n", " if game.move(r, test=True) is not None:\n", " return r\n", "\n", "def get_player_MC(N=100):\n", " def player(game):\n", " return MC_agent(game, N=N)\n", " return player\n", "\n", "def get_player_mixed(*settings):\n", " def player(game):\n", " r = random()\n", " for player, prop in settings:\n", " r-=prop\n", " if r<=0:\n", " return player(game)\n", " return player_random(game)\n", " return player\n", " \n", "\n", "def vs(player1, player2, display=False):\n", " game = Connect4()\n", " history = []\n", " while game.turn < 42 and not game.is_end():\n", " if game.turn%2 == 0:\n", " m = player1(game)\n", " else:\n", " m = player2(game)\n", " game.move(m)\n", " history.append(m)\n", " if display == 'all':\n", " game.display()\n", " if display:\n", " game.display()\n", " if game.is_end():\n", " return game.turn, history\n", " return 0, history\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def train_if(results):\n", " def train(r, history, old_histories, γ=0.95):\n", " _r = -1 if r ==0 else (r-1)%2\n", " if _r not in results:\n", " return\n", " game = Connect4()\n", " board_history = []\n", " for m in history:\n", " game.move(m)\n", " board_history.append (game._np_board())\n", " estimate_V = np.zeros(len(history), dtype=floatX)\n", " if _r==-1:\n", " r = 0.\n", " else:\n", " r = 1.\n", " for i in range(len(history)-1, -1, -1):\n", " estimate_V[i]=r\n", " r *= -γ\n", " #old_histories.append( (board_history, estimate_V) )\n", " data0 = board_history \n", " data1 = estimate_V \n", " loss = train_fn(data0, data1) \n", " return train\n", "\n", "def vs_test(player1, player2, old_histories=[], ngames=1000, train=None):\n", " result = [0,0,0]\n", " for i in range(ngames):\n", " r, history = vs(player1, player2)\n", " if r == 0:\n", " result[0]+=1\n", " else:\n", " result[1 + (r-1)%2]+=1\n", " if train is not None:\n", " train(r, history, old_histories)\n", " old_histories= old_histories[-20:]\n", " return result\n", "\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "train2 = train=train_if([-1,0])\n", "train1 = train=train_if([-1,1])\n", "train_all = train=train_if([-1,0,1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import time\n", "import sys\n", "import pickle\n", "start_time=time.time()\n", "def run_game(V, verbose = False, ɛ=0.1, γ=0.95):\n", " game = Connect4()\n", " history=[]\n", " runtime_V=[]\n", " while game.turn < 42 and not game.is_end():\n", " s = 1 if game.turn%2 == 0 else -1\n", " moves, boards = game._np_branch()\n", " #print(boards)\n", " if random() < ɛ:\n", " idx = randint(0, len(moves)-1)\n", " values=[None]*len(moves)\n", " else:\n", " values = V(boards)\n", " idx = np.argmax(values)\n", " m = moves[idx]\n", " game.move(m)\n", " history.append(boards[idx])\n", " runtime_V.append(values[idx])\n", " if game.is_end():\n", " result = 1.\n", " else:\n", " result = 0. # Tie\n", " # train here\n", " #game.display()\n", " estimate_V = np.zeros(len(history), dtype=floatX)\n", " r = result\n", " for i in range(len(history)-1, -1, -1):\n", " estimate_V[i]=r\n", " r *= -γ\n", " loss = train_fn(np.array(history, dtype=floatX), estimate_V)\n", " return loss\n", "total_loss = 0\n", "N = 10000\n", "print(\"time:\",time.time()-start_time)\n", "print(\"inital result: mc100 vs nn: %s, nn vs mc100 %s\"%(vs_test(get_player_MC(100), player_NN, ngames=100), \n", " vs_test(player_NN, get_player_MC(100), ngames=100)))\n", "print(\"time:\",time.time()-start_time)\n", "for i in range(100*N):\n", " total_loss += run_game(V)\n", " if i%N==N-1:\n", " print(\"time:\",time.time()-start_time)\n", " if i%(10*N)==10*N-1:\n", " s1=(\"%d midterm avgloss=%f\"%(i+1, total_loss/N))\n", " s2=(\" mc100 vs nn: %s, nn vs mc100 %s\"%(vs_test(get_player_MC(100), player_NN, ngames=100, train=train2), \n", " vs_test(player_NN, get_player_MC(100), ngames=100, train=train1)) )\n", " print(s1)\n", " print(s2)\n", " values = lasagne.layers.get_all_param_values(l_out)\n", " pickle.dump(values,open(\"c4-conv3(%s%s).pkl\"%(s1,s2),\"wb\"))\n", " else:\n", " print(\"#%d avgloss=%f, rand vs nn: %s, nn vs rand %s\"%(i+1, total_loss/N, vs_test(player_random, player_NN, ngames=1000, train=train2), \n", " vs_test(player_NN, player_random, ngames=1000, train=train1)) )\n", " total_loss = 0\n", " sys.stdout.flush()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pickle\n", "values = lasagne.layers.get_all_param_values(l_out)\n", "pickle.dump(values,open(\"c4-conv.pkl\",\"wb\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# load\n", "import pickle\n", "values = pickle.load(open(\"c4-conv3(200000 midterm avgloss=0.216788 mc100 vs nn: [5, 18, 77], nn vs mc100 [4, 83, 13]).pkl\",\"rb\"))\n", "lasagne.layers.set_all_param_values(l_out, values)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from IPython.display import clear_output\n", "game = Connect4()\n", "c = randint(0, 1)\n", "moves, boards = None, None\n", "while 1:\n", " clear_output()\n", " display(HTML(game._repr_html_()))\n", " if boards is not None:\n", " print(V(boards))\n", " if game.turn >= 42 or game.is_end():\n", " break\n", " if game.turn%2 == c:\n", " #game.move(MC_agent(game))\n", " #while game.move(randint(0,6)) is None: continue\n", " moves, boards = game._np_branch()\n", " game.move(moves[np.argmax(V(boards))])\n", " else:\n", " game.move(int(input(\"your turn\")))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }