{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Car Racing"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "\n",
    "\n",
    "So far we have seen how to build dueling deep q network. Now we will see how to make use of dueling DQN for playing the car racing game.\n",
    "\n",
    "First, let us import our necessary libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import gym\n",
    "import time\n",
    "import logging\n",
    "import os\n",
    "import sys\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "Initialize all necessary variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "ENV_NAME = 'Seaquest-v0'\n",
    "TOTAL_FRAMES = 20000000\n",
    "MAX_TRAINING_STEPS = 20*60*60/3 \n",
    "TESTING_GAMES = 30 \n",
    "MAX_TESTING_STEPS = 5*60*60/3 \n",
    "TRAIN_AFTER_FRAMES = 50000\n",
    "epoch_size = 50000 \n",
    "MAX_NOOP_START = 30\n",
    "LOG_DIR = 'logs'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "logger = tf.train.SummaryWriter(LOG_DIR)\n",
    "\n",
    "# Intilaize tensorflow session\n",
    "session = tf.InteractiveSession()\n",
    "\n",
    "outdir = 'results'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    " Build the agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "agent = DQN(state_size=env.observation_space.shape,\n",
    "            action_size=env.action_space.n,\n",
    "            session=session,\n",
    "            summary_writer = logger,\n",
    "            exploration_period = 1000000,\n",
    "            minibatch_size = 32,\n",
    "            discount_factor = 0.99,\n",
    "            experience_replay_buffer = 1000000,\n",
    "            target_qnet_update_frequency = 20000, \n",
    "            initial_exploration_epsilon = 1.0,\n",
    "            final_exploration_epsilon = 0.1,\n",
    "            reward_clipping = 1.0,\n",
    "            DoubleDQN = UseDoubleDQN)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "Store the recording"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "session.run(tf.initialize_all_variables())\n",
    "logger.add_graph(session.graph)\n",
    "saver = tf.train.Saver(tf.all_variables())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "env.monitor.start(outdir+'/'+ENV_NAME,force = True, video_callable=multiples_video_schedule)\n",
    "\n",
    "num_frames = 0\n",
    "num_games = 0\n",
    "current_game_frames = 0\n",
    "init_no_ops = np.random.randint(MAX_NOOP_START+1)\n",
    "last_time = time.time()\n",
    "last_frame_count = 0.0\n",
    "state = env.reset()\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    " Now let us training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "while num_frames <= TOTAL_FRAMES+1:\n",
    "    if test_mode:\n",
    "        env.render()\n",
    "\n",
    "    num_frames += 1\n",
    "    current_game_frames += 1\n",
    "\n",
    "    # Select the action given the curent state    \n",
    "    action = agent.action(state, training = True)\n",
    "\n",
    "    # Perform the action on the environment, receiver reward and move to the next state \n",
    "    next_state,reward,done,_ = env.step(action)\n",
    "\n",
    "    # store this transistion information in the experience replay buffer\n",
    "    if current_game_frames >= init_no_ops:\n",
    "        agent.store(state,action,reward,next_state,done)\n",
    "    state = next_state\n",
    "\n",
    "    # Train the agent\n",
    "    if num_frames>=TRAIN_AFTER_FRAMES:\n",
    "        agent.train()\n",
    "\n",
    "    if done or current_game_frames > MAX_TRAINING_STEPS:\n",
    "        state = env.reset()\n",
    "        current_game_frames = 0\n",
    "        num_games += 1\n",
    "        init_no_ops = np.random.randint(MAX_NOOP_START+1)\n",
    "\n",
    "\n",
    "    # Save the network's parameters after every epoch\n",
    "    if num_frames % epoch_size == 0  and  num_frames > TRAIN_AFTER_FRAMES:\n",
    "        saver.save(session, outdir+\"/\"+ENV_NAME+\"/model_\"+str(num_frames/1000)+\"k.ckpt\")\n",
    "        print \"epoch:  frames=\",num_frames,\"   games=\",num_games\n",
    "\n",
    "\n",
    "    # We test the performance for every two epochs\n",
    "    if num_frames % (2*epoch_size) == 0  and num_frames > TRAIN_AFTER_FRAMES:\n",
    "        total_reward = 0\n",
    "        avg_steps = 0\n",
    "        for i in xrange(TESTING_GAMES):\n",
    "            state = env.reset()\n",
    "            init_no_ops = np.random.randint(MAX_NOOP_START+1)\n",
    "            frm = 0\n",
    "            while frm < MAX_TESTING_STEPS:\n",
    "                frm += 1\n",
    "                env.render()\n",
    "                action = agent.action(state, training = False) \n",
    "\n",
    "                if current_game_frames < init_no_ops:\n",
    "                    action = 0\n",
    "\n",
    "                state,reward,done,_ = env.step(action)\n",
    "\n",
    "                total_reward += reward\n",
    "                if done:\n",
    "                    break\n",
    "\n",
    "            avg_steps += frm\n",
    "        avg_reward = float(total_reward)/TESTING_GAMES\n",
    "\n",
    "        str_ = session.run( tf.scalar_summary('test reward ('+str(epoch_size/1000)+'k)', avg_reward) )\n",
    "        logger.add_summary(str_, num_frames) \n",
    "        print '  --> Evaluation Average Reward: ',avg_reward,'   avg steps: ',(avg_steps/TESTING_GAMES)\n",
    "\n",
    "        state = env.reset()\n",
    "\n",
    "env.monitor.close()\n",
    "logger.close()\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:anaconda]",
   "language": "python",
   "name": "conda-env-anaconda-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}