{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Car Racing" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "\n", "\n", "So far we have seen how to build dueling deep q network. Now we will see how to make use of dueling DQN for playing the car racing game.\n", "\n", "First, let us import our necessary libraries" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import gym\n", "import time\n", "import logging\n", "import os\n", "import sys\n", "import tensorflow as tf" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "Initialize all necessary variables" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ENV_NAME = 'Seaquest-v0'\n", "TOTAL_FRAMES = 20000000\n", "MAX_TRAINING_STEPS = 20*60*60/3 \n", "TESTING_GAMES = 30 \n", "MAX_TESTING_STEPS = 5*60*60/3 \n", "TRAIN_AFTER_FRAMES = 50000\n", "epoch_size = 50000 \n", "MAX_NOOP_START = 30\n", "LOG_DIR = 'logs'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "logger = tf.train.SummaryWriter(LOG_DIR)\n", "\n", "# Intilaize tensorflow session\n", "session = tf.InteractiveSession()\n", "\n", "outdir = 'results'" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ " Build the agent" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "agent = DQN(state_size=env.observation_space.shape,\n", " action_size=env.action_space.n,\n", " session=session,\n", " summary_writer = logger,\n", " exploration_period = 1000000,\n", " minibatch_size = 32,\n", " discount_factor = 0.99,\n", " experience_replay_buffer = 1000000,\n", " target_qnet_update_frequency = 20000, \n", " initial_exploration_epsilon = 1.0,\n", " final_exploration_epsilon = 0.1,\n", " reward_clipping = 1.0,\n", " DoubleDQN = UseDoubleDQN)\n" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "Store the recording" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "session.run(tf.initialize_all_variables())\n", "logger.add_graph(session.graph)\n", "saver = tf.train.Saver(tf.all_variables())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "env.monitor.start(outdir+'/'+ENV_NAME,force = True, video_callable=multiples_video_schedule)\n", "\n", "num_frames = 0\n", "num_games = 0\n", "current_game_frames = 0\n", "init_no_ops = np.random.randint(MAX_NOOP_START+1)\n", "last_time = time.time()\n", "last_frame_count = 0.0\n", "state = env.reset()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " Now let us training" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "while num_frames <= TOTAL_FRAMES+1:\n", " if test_mode:\n", " env.render()\n", "\n", " num_frames += 1\n", " current_game_frames += 1\n", "\n", " # Select the action given the curent state \n", " action = agent.action(state, training = True)\n", "\n", " # Perform the action on the environment, receiver reward and move to the next state \n", " next_state,reward,done,_ = env.step(action)\n", "\n", " # store this transistion information in the experience replay buffer\n", " if current_game_frames >= init_no_ops:\n", " agent.store(state,action,reward,next_state,done)\n", " state = next_state\n", "\n", " # Train the agent\n", " if num_frames>=TRAIN_AFTER_FRAMES:\n", " agent.train()\n", "\n", " if done or current_game_frames > MAX_TRAINING_STEPS:\n", " state = env.reset()\n", " current_game_frames = 0\n", " num_games += 1\n", " init_no_ops = np.random.randint(MAX_NOOP_START+1)\n", "\n", "\n", " # Save the network's parameters after every epoch\n", " if num_frames % epoch_size == 0 and num_frames > TRAIN_AFTER_FRAMES:\n", " saver.save(session, outdir+\"/\"+ENV_NAME+\"/model_\"+str(num_frames/1000)+\"k.ckpt\")\n", " print \"epoch: frames=\",num_frames,\" games=\",num_games\n", "\n", "\n", " # We test the performance for every two epochs\n", " if num_frames % (2*epoch_size) == 0 and num_frames > TRAIN_AFTER_FRAMES:\n", " total_reward = 0\n", " avg_steps = 0\n", " for i in xrange(TESTING_GAMES):\n", " state = env.reset()\n", " init_no_ops = np.random.randint(MAX_NOOP_START+1)\n", " frm = 0\n", " while frm < MAX_TESTING_STEPS:\n", " frm += 1\n", " env.render()\n", " action = agent.action(state, training = False) \n", "\n", " if current_game_frames < init_no_ops:\n", " action = 0\n", "\n", " state,reward,done,_ = env.step(action)\n", "\n", " total_reward += reward\n", " if done:\n", " break\n", "\n", " avg_steps += frm\n", " avg_reward = float(total_reward)/TESTING_GAMES\n", "\n", " str_ = session.run( tf.scalar_summary('test reward ('+str(epoch_size/1000)+'k)', avg_reward) )\n", " logger.add_summary(str_, num_frames) \n", " print ' --> Evaluation Average Reward: ',avg_reward,' avg steps: ',(avg_steps/TESTING_GAMES)\n", "\n", " state = env.reset()\n", "\n", "env.monitor.close()\n", "logger.close()\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:anaconda]", "language": "python", "name": "conda-env-anaconda-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.11" } }, "nbformat": 4, "nbformat_minor": 2 }