{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Dueling network" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "First we will import all the necessary libaries," ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore')\n", "import numpy as np\n", "import tensorflow as tf\n", "import gym\n", "from gym.spaces import Box\n", "from scipy.misc import imresize\n", "import random\n", "import cv2\n", "import time\n", "import logging\n", "import os\n", "import sys" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "\n", "Now we build our dueling deep q network,\n", "we build three convolutional layers followed by two fully connected layers \n", "and the final fully connected layer will be split into two separate layers for\n", "value stream and advantage stream and we use aggregate layer which combines both value stream\n", "and advantage stream to compute the q value. The dimensions of these layers are given as follows,\n", "\n", "\n", "Layer 1: 32 8x8 filters with stride 4 + RELU
\n", "Layer 2: 64 4x4 filters with stride 2 + RELU
\n", "Layer 3: 64 3x3 filters with stride 1 + RELU
\n", "\n", "Layer 4a: 512 unit Fully-Connected layer + RELU
\n", "Layer 4b: 512 unit Fully-Connected layer + RELU
\n", "\n", "Layer 5a: 1 unit FC + RELU (State Value)
\n", "Layer 5b: actions FC + RELU (Advantage Value)
\n", "\n", "\n", "Layer6: Aggregate V(s)+A(s,a)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class QNetworkDueling():\n", " \n", "\n", " # we define the init method for initializing all layers,\n", "\n", " def __init__(self, input_size, output_size, name):\n", " self.name = name\n", " self.input_size = input_size\n", " self.output_size = output_size\n", "\n", "\n", " with tf.variable_scope(self.name):\n", "\n", "\n", " # Three convolutional layers\n", " self.W_conv1 = self.weight_variable([8, 8, 4, 32]) \n", " self.B_conv1 = self.bias_variable([32])\n", " self.stride1 = 4\n", "\n", " self.W_conv2 = self.weight_variable([4, 4, 32, 64])\n", " self.B_conv2 = self.bias_variable([64])\n", " self.stride2 = 2\n", "\n", " self.W_conv3 = self.weight_variable([3, 3, 64, 64])\n", " self.B_conv3 = self.bias_variable([64])\n", " self.stride3 = 1\n", "\n", " # fully connected layer 1\n", " self.W_fc4a = self.weight_variable([7*7*64, 512])\n", " self.B_fc4a = self.bias_variable([512])\n", "\n", " # fully connected layer 2\n", " self.W_fc4b = self.weight_variable([7*7*64, 512])\n", " self.B_fc4b = self.bias_variable([512])\n", "\n", " # value stream\n", " self.W_fc5a = self.weight_variable([512, 1])\n", " self.B_fc5a = self.bias_variable([1])\n", "\n", " # advantage stream\n", " self.W_fc5b = self.weight_variable([512, self.output_size])\n", " self.B_fc5b = self.bias_variable([self.output_size])\n", "\n", "\n", " # print number of parameters in the network\n", " self.print_num_of_parameters()\n", "\n", "\n", "\n", " # Now we define the method called __call_ to perform the convolutional operation\n", "\n", " def __call__(self, input_tensor):\n", " if type(input_tensor) == list:\n", " input_tensor = tf.concat(1, input_tensor)\n", "\n", " with tf.variable_scope(self.name):\n", "\n", " # Perform convolutional operation on three layers\n", " self.h_conv1 = tf.nn.relu( tf.nn.conv2d(input_tensor, self.W_conv1, strides=[1, self.stride1, self.stride1, 1], padding='VALID') + self.B_conv1 )\n", " self.h_conv2 = tf.nn.relu( tf.nn.conv2d(self.h_conv1, self.W_conv2, strides=[1, self.stride2, self.stride2, 1], padding='VALID') + self.B_conv2 )\n", " self.h_conv3 = tf.nn.relu( tf.nn.conv2d(self.h_conv2, self.W_conv3, strides=[1, self.stride3, self.stride3, 1], padding='VALID') + self.B_conv3 )\n", "\n", " # Flatten the convolutional output\n", " self.h_conv3_flat = tf.reshape(self.h_conv3, [-1, 7*7*64])\n", "\n", "\n", " # Input the flattened convolutional layer output to the fully connected layer\n", " self.h_fc4a = tf.nn.relu(tf.matmul(self.h_conv3_flat, self.W_fc4a) + self.B_fc4a)\n", " self.h_fc4b = tf.nn.relu(tf.matmul(self.h_conv3_flat, self.W_fc4b) + self.B_fc4b)\n", "\n", "\n", " # Compute value stream and advantage stream\n", " self.h_fc5a_value = tf.identity(tf.matmul(self.h_fc4a, self.W_fc5a) + self.B_fc5a)\n", " self.h_fc5b_advantage = tf.identity(tf.matmul(self.h_fc4b, self.W_fc5b) + self.B_fc5b)\n", "\n", " # Combine the both value and advantage stream to get the Q value\n", " self.h_fc6 = self.h_fc5a_value + ( self.h_fc5b_advantage - tf.reduce_mean(self.h_fc5b_advantage, reduction_indices=[1,], keep_dims=True) )\n", "\n", "\n", " return self.h_fc6\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:universe]", "language": "python", "name": "conda-env-universe-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" } }, "nbformat": 4, "nbformat_minor": 2 }