{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# dqn PacMan" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "import gym\n", "import numpy as np \n", "\n", "from keras.models import Sequential\n", "from keras.layers import Dense, Activation, Flatten\n", "from keras.optimizers import Adam\n", "import matplotlib.pyplot as plt\n", "\n", "from rl.agents.dqn import DQNAgent\n", "from rl.agents.ddpg import DDPGAgent\n", "from rl.policy import BoltzmannGumbelQPolicy \n", "from rl.memory import SequentialMemory" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "env = gym.make('MsPacman-v0')\n", "nb_actions = env.action_space.n\n", "nb_actions" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "flatten_6 (Flatten) (None, 100800) 0 \n", "_________________________________________________________________\n", "dense_43 (Dense) (None, 3) 302403 \n", "_________________________________________________________________\n", "dense_44 (Dense) (None, 9) 36 \n", "_________________________________________________________________\n", "dense_45 (Dense) (None, 6) 60 \n", "_________________________________________________________________\n", "dense_46 (Dense) (None, 9) 63 \n", "_________________________________________________________________\n", "dense_47 (Dense) (None, 6) 60 \n", "_________________________________________________________________\n", "dense_48 (Dense) (None, 9) 63 \n", "_________________________________________________________________\n", "dense_49 (Dense) (None, 6) 60 \n", "_________________________________________________________________\n", "dense_50 (Dense) (None, 9) 63 \n", "_________________________________________________________________\n", "dense_51 (Dense) (None, 3) 30 \n", "_________________________________________________________________\n", "dense_52 (Dense) (None, 9) 36 \n", "_________________________________________________________________\n", "dense_53 (Dense) (None, 3) 30 \n", "_________________________________________________________________\n", "dense_54 (Dense) (None, 9) 36 \n", "_________________________________________________________________\n", "activation_6 (Activation) (None, 9) 0 \n", "=================================================================\n", "Total params: 302,940\n", "Trainable params: 302,940\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "None\n" ] } ], "source": [ "# Next, we build a neural network model\n", "model = Sequential()\n", "model.add(Flatten(input_shape=(1,) + env.observation_space.shape))\n", "model.add(Dense(3, activation= 'tanh')) # layer 1: 3 cells with tanh activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(6, activation= 'sigmoid')) #layer 2 : 6 cells with sigmoid activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(6, activation= 'sigmoid')) #layer 3 : 6 cells with sigmoid activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(6, activation= 'sigmoid')) #layer 4 : 6 cells with sigmoid activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(3, activation= 'tanh')) #layer 5 : 3 cells with tanh activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(3, activation= 'sigmoid')) #layer 6 : 6 cells with sigmoid activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Activation('softmax')) # one layer of 1 unit with sigmoid activation function\n", "print(model.summary())" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "#DQN -- Deep Reinforcement Learning \n", "\n", "#Configure and compile the agent. \n", "#Use every built-in Keras optimizer and metrics!\n", "memory = SequentialMemory(limit=100000, window_length=1)\n", "policy = BoltzmannGumbelQPolicy()\n", "dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,\n", " target_model_update=1e-2, policy=policy)\n", "dqn.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training for 100000 steps ...\n", " 922/100000: episode: 1, duration: 29.546s, episode steps: 922, steps per second: 31, episode reward: 370.000, mean reward: 0.401 [0.000, 10.000], mean action: 3.359 [0.000, 8.000], mean observation: 72.595 [0.000, 228.000], loss: 2.750501, mean_absolute_error: 0.159018, acc: 0.409802, mean_q: 0.655422\n", " 1355/100000: episode: 2, duration: 12.858s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.009 [1.000, 6.000], mean observation: 72.910 [0.000, 228.000], loss: 1.932692, mean_absolute_error: 0.100334, acc: 0.736865, mean_q: 0.939665\n", " 1794/100000: episode: 3, duration: 12.995s, episode steps: 439, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.030 [0.000, 7.000], mean observation: 72.906 [0.000, 228.000], loss: 1.599364, mean_absolute_error: 0.077912, acc: 0.806948, mean_q: 0.964618\n", " 2230/100000: episode: 4, duration: 13.087s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.018 [0.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 1.493305, mean_absolute_error: 0.067161, acc: 0.844252, mean_q: 0.977191\n", " 2695/100000: episode: 5, duration: 19.184s, episode steps: 465, steps per second: 24, episode reward: 110.000, mean reward: 0.237 [0.000, 10.000], mean action: 3.015 [0.000, 8.000], mean observation: 72.873 [0.000, 228.000], loss: 1.364146, mean_absolute_error: 0.060264, acc: 0.862769, mean_q: 0.984469\n", " 3244/100000: episode: 6, duration: 17.063s, episode steps: 549, steps per second: 32, episode reward: 60.000, mean reward: 0.109 [0.000, 10.000], mean action: 3.002 [0.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 1.354492, mean_absolute_error: 0.054776, acc: 0.885986, mean_q: 0.989604\n", " 3764/100000: episode: 7, duration: 16.936s, episode steps: 520, steps per second: 31, episode reward: 110.000, mean reward: 0.212 [0.000, 10.000], mean action: 3.033 [2.000, 8.000], mean observation: 72.857 [0.000, 228.000], loss: 1.285808, mean_absolute_error: 0.049860, acc: 0.902103, mean_q: 0.992924\n", " 4286/100000: episode: 8, duration: 16.436s, episode steps: 522, steps per second: 32, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.021 [0.000, 8.000], mean observation: 72.854 [0.000, 228.000], loss: 1.222823, mean_absolute_error: 0.046015, acc: 0.913374, mean_q: 0.995000\n", " 4818/100000: episode: 9, duration: 16.453s, episode steps: 532, steps per second: 32, episode reward: 110.000, mean reward: 0.207 [0.000, 10.000], mean action: 2.994 [0.000, 6.000], mean observation: 72.854 [0.000, 228.000], loss: 1.269515, mean_absolute_error: 0.045624, acc: 0.919290, mean_q: 0.996452\n", " 5240/100000: episode: 10, duration: 13.077s, episode steps: 422, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.012 [2.000, 8.000], mean observation: 72.910 [0.000, 228.000], loss: 1.165863, mean_absolute_error: 0.041262, acc: 0.929354, mean_q: 0.997315\n", " 5660/100000: episode: 11, duration: 12.973s, episode steps: 420, steps per second: 32, episode reward: 60.000, mean reward: 0.143 [0.000, 10.000], mean action: 3.007 [0.000, 8.000], mean observation: 72.908 [0.000, 228.000], loss: 1.207981, mean_absolute_error: 0.041799, acc: 0.931696, mean_q: 0.997878\n", " 6188/100000: episode: 12, duration: 17.165s, episode steps: 528, steps per second: 31, episode reward: 110.000, mean reward: 0.208 [0.000, 10.000], mean action: 3.019 [0.000, 8.000], mean observation: 72.858 [0.000, 228.000], loss: 1.079953, mean_absolute_error: 0.037732, acc: 0.937855, mean_q: 0.998365\n", " 7318/100000: episode: 13, duration: 36.109s, episode steps: 1130, steps per second: 31, episode reward: 180.000, mean reward: 0.159 [0.000, 50.000], mean action: 3.014 [1.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 1.126516, mean_absolute_error: 0.036324, acc: 0.942893, mean_q: 0.998921\n", " 7749/100000: episode: 14, duration: 13.283s, episode steps: 431, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.021 [3.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 1.242681, mean_absolute_error: 0.034535, acc: 0.947288, mean_q: 0.999283\n", " 8193/100000: episode: 15, duration: 13.465s, episode steps: 444, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 1.276073, mean_absolute_error: 0.034827, acc: 0.949747, mean_q: 0.999426\n", " 8626/100000: episode: 16, duration: 13.100s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.912 [0.000, 228.000], loss: 1.219086, mean_absolute_error: 0.032687, acc: 0.953161, mean_q: 0.999536\n", " 9058/100000: episode: 17, duration: 13.008s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.014 [1.000, 8.000], mean observation: 72.917 [0.000, 228.000], loss: 0.966979, mean_absolute_error: 0.031524, acc: 0.955295, mean_q: 0.999627\n", " 9568/100000: episode: 18, duration: 15.549s, episode steps: 510, steps per second: 33, episode reward: 110.000, mean reward: 0.216 [0.000, 10.000], mean action: 2.994 [1.000, 4.000], mean observation: 72.859 [0.000, 228.000], loss: 0.906498, mean_absolute_error: 0.029293, acc: 0.959559, mean_q: 0.999698\n", " 9996/100000: episode: 19, duration: 13.345s, episode steps: 428, steps per second: 32, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.993 [0.000, 5.000], mean observation: 72.908 [0.000, 228.000], loss: 0.958007, mean_absolute_error: 0.029338, acc: 0.957579, mean_q: 0.999756\n", " 10419/100000: episode: 20, duration: 13.323s, episode steps: 423, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.005 [0.000, 8.000], mean observation: 72.906 [0.000, 228.000], loss: 1.070048, mean_absolute_error: 0.031253, acc: 0.959146, mean_q: 0.999802\n", " 10851/100000: episode: 21, duration: 13.485s, episode steps: 432, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.002 [2.000, 5.000], mean observation: 72.909 [0.000, 228.000], loss: 1.149229, mean_absolute_error: 0.031137, acc: 0.961010, mean_q: 0.999839\n", " 11429/100000: episode: 22, duration: 17.292s, episode steps: 578, steps per second: 33, episode reward: 110.000, mean reward: 0.190 [0.000, 10.000], mean action: 3.012 [0.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 1.168064, mean_absolute_error: 0.029637, acc: 0.962695, mean_q: 0.999876\n", " 11944/100000: episode: 23, duration: 15.214s, episode steps: 515, steps per second: 34, episode reward: 110.000, mean reward: 0.214 [0.000, 10.000], mean action: 2.992 [0.000, 4.000], mean observation: 72.853 [0.000, 228.000], loss: 1.067216, mean_absolute_error: 0.030874, acc: 0.961650, mean_q: 0.999906\n", " 12384/100000: episode: 24, duration: 13.070s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.935310, mean_absolute_error: 0.026834, acc: 0.966264, mean_q: 0.999924\n", " 12965/100000: episode: 25, duration: 17.462s, episode steps: 581, steps per second: 33, episode reward: 70.000, mean reward: 0.120 [0.000, 10.000], mean action: 3.014 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 1.057993, mean_absolute_error: 0.028709, acc: 0.966652, mean_q: 0.999940\n", " 13391/100000: episode: 26, duration: 12.881s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.865644, mean_absolute_error: 0.026880, acc: 0.966623, mean_q: 0.999953\n", " 13919/100000: episode: 27, duration: 15.738s, episode steps: 528, steps per second: 34, episode reward: 110.000, mean reward: 0.208 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.854 [0.000, 228.000], loss: 1.009845, mean_absolute_error: 0.028659, acc: 0.966974, mean_q: 0.999963\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 14350/100000: episode: 28, duration: 12.788s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 0.837868, mean_absolute_error: 0.025444, acc: 0.970780, mean_q: 0.999970\n", " 14775/100000: episode: 29, duration: 12.599s, episode steps: 425, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.012 [3.000, 6.000], mean observation: 72.913 [0.000, 228.000], loss: 0.885915, mean_absolute_error: 0.027345, acc: 0.966765, mean_q: 0.999976\n", " 15202/100000: episode: 30, duration: 12.630s, episode steps: 427, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [0.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 1.067898, mean_absolute_error: 0.027376, acc: 0.970653, mean_q: 0.999980\n", " 15628/100000: episode: 31, duration: 12.637s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.991 [1.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 0.926451, mean_absolute_error: 0.027283, acc: 0.971097, mean_q: 0.999984\n", " 16057/100000: episode: 32, duration: 12.722s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.804952, mean_absolute_error: 0.024675, acc: 0.971372, mean_q: 0.999987\n", " 16588/100000: episode: 33, duration: 15.744s, episode steps: 531, steps per second: 34, episode reward: 110.000, mean reward: 0.207 [0.000, 10.000], mean action: 3.013 [2.000, 8.000], mean observation: 72.854 [0.000, 228.000], loss: 0.925594, mean_absolute_error: 0.027047, acc: 0.972575, mean_q: 0.999990\n", " 17105/100000: episode: 34, duration: 15.288s, episode steps: 517, steps per second: 34, episode reward: 60.000, mean reward: 0.116 [0.000, 10.000], mean action: 3.008 [3.000, 5.000], mean observation: 72.920 [0.000, 228.000], loss: 0.892844, mean_absolute_error: 0.025035, acc: 0.971893, mean_q: 0.999992\n", " 17536/100000: episode: 35, duration: 12.722s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.023 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.973951, mean_absolute_error: 0.025876, acc: 0.975348, mean_q: 0.999994\n", " 17974/100000: episode: 36, duration: 13.189s, episode steps: 438, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.910 [0.000, 228.000], loss: 0.845903, mean_absolute_error: 0.023325, acc: 0.974244, mean_q: 0.999995\n", " 18405/100000: episode: 37, duration: 12.907s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.005 [0.000, 8.000], mean observation: 72.917 [0.000, 228.000], loss: 0.885737, mean_absolute_error: 0.025633, acc: 0.974985, mean_q: 0.999996\n", " 18834/100000: episode: 38, duration: 12.750s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.005 [2.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.966934, mean_absolute_error: 0.028101, acc: 0.971882, mean_q: 0.999997\n", " 19299/100000: episode: 39, duration: 13.883s, episode steps: 465, steps per second: 33, episode reward: 110.000, mean reward: 0.237 [0.000, 10.000], mean action: 3.004 [1.000, 6.000], mean observation: 72.894 [0.000, 228.000], loss: 0.950698, mean_absolute_error: 0.025855, acc: 0.973790, mean_q: 0.999997\n", " 19725/100000: episode: 40, duration: 12.708s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.016 [3.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 0.934569, mean_absolute_error: 0.025027, acc: 0.975719, mean_q: 0.999998\n", " 20161/100000: episode: 41, duration: 12.911s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.966219, mean_absolute_error: 0.025600, acc: 0.975989, mean_q: 0.999998\n", " 20590/100000: episode: 42, duration: 12.721s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.912 [0.000, 228.000], loss: 0.831869, mean_absolute_error: 0.024158, acc: 0.976034, mean_q: 0.999999\n", " 21022/100000: episode: 43, duration: 12.844s, episode steps: 432, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.021 [3.000, 8.000], mean observation: 72.912 [0.000, 228.000], loss: 1.055637, mean_absolute_error: 0.027234, acc: 0.977937, mean_q: 0.999999\n", " 21455/100000: episode: 44, duration: 12.819s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.021 [3.000, 7.000], mean observation: 72.909 [0.000, 228.000], loss: 0.836770, mean_absolute_error: 0.023797, acc: 0.978493, mean_q: 0.999999\n", " 21889/100000: episode: 45, duration: 12.940s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.912 [0.000, 228.000], loss: 0.836639, mean_absolute_error: 0.023856, acc: 0.977823, mean_q: 0.999999\n", " 22343/100000: episode: 46, duration: 13.496s, episode steps: 454, steps per second: 34, episode reward: 110.000, mean reward: 0.242 [0.000, 10.000], mean action: 3.009 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.805641, mean_absolute_error: 0.023271, acc: 0.977905, mean_q: 0.999999\n", " 22777/100000: episode: 47, duration: 12.854s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [0.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 1.006345, mean_absolute_error: 0.026670, acc: 0.974942, mean_q: 1.000000\n", " 23214/100000: episode: 48, duration: 12.924s, episode steps: 437, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.844489, mean_absolute_error: 0.023875, acc: 0.979334, mean_q: 1.000000\n", " 23641/100000: episode: 49, duration: 12.627s, episode steps: 427, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.991 [1.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.983037, mean_absolute_error: 0.026869, acc: 0.979435, mean_q: 1.000000\n", " 24067/100000: episode: 50, duration: 12.623s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.009 [3.000, 6.000], mean observation: 72.912 [0.000, 228.000], loss: 0.934281, mean_absolute_error: 0.025823, acc: 0.979020, mean_q: 1.000000\n", " 24494/100000: episode: 51, duration: 12.634s, episode steps: 427, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.988 [0.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.891939, mean_absolute_error: 0.025250, acc: 0.977532, mean_q: 1.000000\n", " 24932/100000: episode: 52, duration: 12.988s, episode steps: 438, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.876506, mean_absolute_error: 0.024347, acc: 0.979880, mean_q: 1.000000\n", " 25362/100000: episode: 53, duration: 12.794s, episode steps: 430, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.817384, mean_absolute_error: 0.022920, acc: 0.980959, mean_q: 1.000000\n", " 25784/100000: episode: 54, duration: 12.561s, episode steps: 422, steps per second: 34, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.002 [1.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 0.859725, mean_absolute_error: 0.023460, acc: 0.982968, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 26222/100000: episode: 55, duration: 12.969s, episode steps: 438, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.877860, mean_absolute_error: 0.020972, acc: 0.981949, mean_q: 1.000000\n", " 26654/100000: episode: 56, duration: 12.859s, episode steps: 432, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 0.893530, mean_absolute_error: 0.025140, acc: 0.978516, mean_q: 1.000000\n", " 27184/100000: episode: 57, duration: 15.685s, episode steps: 530, steps per second: 34, episode reward: 110.000, mean reward: 0.208 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.849 [0.000, 228.000], loss: 0.871484, mean_absolute_error: 0.024115, acc: 0.980837, mean_q: 1.000000\n", " 27647/100000: episode: 58, duration: 13.676s, episode steps: 463, steps per second: 34, episode reward: 110.000, mean reward: 0.238 [0.000, 10.000], mean action: 2.998 [1.000, 4.000], mean observation: 72.896 [0.000, 228.000], loss: 0.882798, mean_absolute_error: 0.022712, acc: 0.981776, mean_q: 1.000000\n", " 28083/100000: episode: 59, duration: 12.928s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.912 [0.000, 228.000], loss: 0.924782, mean_absolute_error: 0.024951, acc: 0.982870, mean_q: 1.000000\n", " 28515/100000: episode: 60, duration: 13.170s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [1.000, 5.000], mean observation: 72.909 [0.000, 228.000], loss: 0.950630, mean_absolute_error: 0.025596, acc: 0.982784, mean_q: 1.000000\n", " 28957/100000: episode: 61, duration: 13.095s, episode steps: 442, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 1.017386, mean_absolute_error: 0.025839, acc: 0.980345, mean_q: 1.000000\n", " 29391/100000: episode: 62, duration: 12.831s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.917 [0.000, 228.000], loss: 0.819406, mean_absolute_error: 0.023005, acc: 0.980703, mean_q: 1.000000\n", " 29824/100000: episode: 63, duration: 12.812s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 0.993344, mean_absolute_error: 0.026588, acc: 0.982318, mean_q: 1.000000\n", " 30267/100000: episode: 64, duration: 13.512s, episode steps: 443, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 2.989 [0.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.812653, mean_absolute_error: 0.022780, acc: 0.980954, mean_q: 1.000000\n", " 30707/100000: episode: 65, duration: 13.171s, episode steps: 440, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.907 [0.000, 228.000], loss: 0.804780, mean_absolute_error: 0.023003, acc: 0.979119, mean_q: 1.000000\n", " 31137/100000: episode: 66, duration: 12.918s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.958653, mean_absolute_error: 0.022369, acc: 0.983358, mean_q: 1.000000\n", " 31567/100000: episode: 67, duration: 13.041s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.842303, mean_absolute_error: 0.023308, acc: 0.981904, mean_q: 1.000000\n", " 31999/100000: episode: 68, duration: 12.833s, episode steps: 432, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.969986, mean_absolute_error: 0.024267, acc: 0.983362, mean_q: 1.000000\n", " 32434/100000: episode: 69, duration: 12.959s, episode steps: 435, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.018 [3.000, 7.000], mean observation: 72.910 [0.000, 228.000], loss: 0.943249, mean_absolute_error: 0.023929, acc: 0.981968, mean_q: 1.000000\n", " 32860/100000: episode: 70, duration: 12.618s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.005 [1.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 0.853826, mean_absolute_error: 0.022960, acc: 0.984815, mean_q: 1.000000\n", " 33301/100000: episode: 71, duration: 13.135s, episode steps: 441, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.945814, mean_absolute_error: 0.023591, acc: 0.983985, mean_q: 1.000000\n", " 33738/100000: episode: 72, duration: 12.972s, episode steps: 437, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.995 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.853012, mean_absolute_error: 0.023421, acc: 0.982194, mean_q: 1.000000\n", " 34169/100000: episode: 73, duration: 12.839s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.002 [0.000, 7.000], mean observation: 72.910 [0.000, 228.000], loss: 0.837158, mean_absolute_error: 0.022482, acc: 0.985426, mean_q: 1.000000\n", " 34595/100000: episode: 74, duration: 12.850s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.005 [2.000, 6.000], mean observation: 72.906 [0.000, 228.000], loss: 0.903527, mean_absolute_error: 0.022658, acc: 0.983715, mean_q: 1.000000\n", " 35022/100000: episode: 75, duration: 12.748s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.991 [0.000, 4.000], mean observation: 72.911 [0.000, 228.000], loss: 0.792673, mean_absolute_error: 0.021522, acc: 0.984778, mean_q: 1.000000\n", " 35448/100000: episode: 76, duration: 12.698s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [0.000, 6.000], mean observation: 72.908 [0.000, 228.000], loss: 0.867522, mean_absolute_error: 0.023318, acc: 0.983788, mean_q: 1.000000\n", " 35875/100000: episode: 77, duration: 12.782s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.835425, mean_absolute_error: 0.021034, acc: 0.984265, mean_q: 1.000000\n", " 36306/100000: episode: 78, duration: 12.901s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.007 [3.000, 5.000], mean observation: 72.917 [0.000, 228.000], loss: 0.875452, mean_absolute_error: 0.022229, acc: 0.982961, mean_q: 1.000000\n", " 36740/100000: episode: 79, duration: 12.985s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.947098, mean_absolute_error: 0.025119, acc: 0.984087, mean_q: 1.000000\n", " 37166/100000: episode: 80, duration: 12.678s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.748542, mean_absolute_error: 0.020739, acc: 0.984522, mean_q: 1.000000\n", " 37612/100000: episode: 81, duration: 13.315s, episode steps: 446, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 2.996 [1.000, 3.000], mean observation: 72.920 [0.000, 228.000], loss: 0.752258, mean_absolute_error: 0.020643, acc: 0.984725, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 38046/100000: episode: 82, duration: 12.966s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.746647, mean_absolute_error: 0.020482, acc: 0.985383, mean_q: 1.000000\n", " 38482/100000: episode: 83, duration: 12.969s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.007 [2.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.842648, mean_absolute_error: 0.022776, acc: 0.984088, mean_q: 1.000000\n", " 38998/100000: episode: 84, duration: 15.467s, episode steps: 516, steps per second: 33, episode reward: 110.000, mean reward: 0.213 [0.000, 10.000], mean action: 3.010 [0.000, 8.000], mean observation: 72.853 [0.000, 228.000], loss: 0.791431, mean_absolute_error: 0.021996, acc: 0.982316, mean_q: 1.000000\n", " 39430/100000: episode: 85, duration: 12.932s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.770487, mean_absolute_error: 0.021347, acc: 0.983507, mean_q: 1.000000\n", " 39870/100000: episode: 86, duration: 13.094s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.912 [0.000, 228.000], loss: 0.851223, mean_absolute_error: 0.022746, acc: 0.985653, mean_q: 1.000000\n", " 40298/100000: episode: 87, duration: 12.801s, episode steps: 428, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.866558, mean_absolute_error: 0.023239, acc: 0.984959, mean_q: 1.000000\n", " 40738/100000: episode: 88, duration: 13.130s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.913 [0.000, 228.000], loss: 0.814577, mean_absolute_error: 0.021974, acc: 0.985369, mean_q: 1.000000\n", " 41180/100000: episode: 89, duration: 13.200s, episode steps: 442, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.993 [1.000, 6.000], mean observation: 72.913 [0.000, 228.000], loss: 0.791810, mean_absolute_error: 0.021386, acc: 0.985506, mean_q: 1.000000\n", " 41702/100000: episode: 90, duration: 15.572s, episode steps: 522, steps per second: 34, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.853 [0.000, 228.000], loss: 0.927495, mean_absolute_error: 0.024034, acc: 0.987428, mean_q: 1.000000\n", " 42138/100000: episode: 91, duration: 13.054s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.918 [0.000, 228.000], loss: 0.935920, mean_absolute_error: 0.023190, acc: 0.985163, mean_q: 1.000000\n", " 42568/100000: episode: 92, duration: 12.831s, episode steps: 430, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 0.749676, mean_absolute_error: 0.020661, acc: 0.984448, mean_q: 1.000000\n", " 43039/100000: episode: 93, duration: 13.984s, episode steps: 471, steps per second: 34, episode reward: 110.000, mean reward: 0.234 [0.000, 10.000], mean action: 3.011 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.832054, mean_absolute_error: 0.022190, acc: 0.985801, mean_q: 1.000000\n", " 43473/100000: episode: 94, duration: 12.932s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.023 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.718739, mean_absolute_error: 0.019606, acc: 0.986679, mean_q: 1.000000\n", " 43904/100000: episode: 95, duration: 12.903s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.790879, mean_absolute_error: 0.020783, acc: 0.988399, mean_q: 1.000000\n", " 44329/100000: episode: 96, duration: 12.874s, episode steps: 425, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.912065, mean_absolute_error: 0.022168, acc: 0.987132, mean_q: 1.000000\n", " 44763/100000: episode: 97, duration: 13.024s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [2.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.837879, mean_absolute_error: 0.020969, acc: 0.984375, mean_q: 1.000000\n", " 45290/100000: episode: 98, duration: 15.742s, episode steps: 527, steps per second: 33, episode reward: 110.000, mean reward: 0.209 [0.000, 10.000], mean action: 3.009 [3.000, 8.000], mean observation: 72.863 [0.000, 228.000], loss: 0.899594, mean_absolute_error: 0.023656, acc: 0.986717, mean_q: 1.000000\n", " 45714/100000: episode: 99, duration: 13.066s, episode steps: 424, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.913 [0.000, 228.000], loss: 0.795141, mean_absolute_error: 0.020977, acc: 0.988060, mean_q: 1.000000\n", " 46150/100000: episode: 100, duration: 13.063s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.718090, mean_absolute_error: 0.019679, acc: 0.985737, mean_q: 1.000000\n", " 46580/100000: episode: 101, duration: 12.913s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 0.697030, mean_absolute_error: 0.019125, acc: 0.986773, mean_q: 1.000000\n", " 47013/100000: episode: 102, duration: 13.298s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.739714, mean_absolute_error: 0.019914, acc: 0.986937, mean_q: 1.000000\n", " 47448/100000: episode: 103, duration: 13.779s, episode steps: 435, steps per second: 32, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.738085, mean_absolute_error: 0.020341, acc: 0.984770, mean_q: 1.000000\n", " 47889/100000: episode: 104, duration: 13.802s, episode steps: 441, steps per second: 32, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.832484, mean_absolute_error: 0.021954, acc: 0.986820, mean_q: 1.000000\n", " 48319/100000: episode: 105, duration: 12.800s, episode steps: 430, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.798077, mean_absolute_error: 0.021110, acc: 0.987718, mean_q: 1.000000\n", " 48752/100000: episode: 106, duration: 12.868s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.888920, mean_absolute_error: 0.021588, acc: 0.987009, mean_q: 1.000000\n", " 49181/100000: episode: 107, duration: 12.792s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.817413, mean_absolute_error: 0.021793, acc: 0.986233, mean_q: 1.000000\n", " 49614/100000: episode: 108, duration: 12.875s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.014 [3.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.804071, mean_absolute_error: 0.021243, acc: 0.987803, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 50042/100000: episode: 109, duration: 13.319s, episode steps: 428, steps per second: 32, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.796233, mean_absolute_error: 0.021130, acc: 0.987077, mean_q: 1.000000\n", " 50472/100000: episode: 110, duration: 13.205s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 6.000], mean observation: 72.909 [0.000, 228.000], loss: 0.958833, mean_absolute_error: 0.023197, acc: 0.987064, mean_q: 1.000000\n", " 51182/100000: episode: 111, duration: 21.496s, episode steps: 710, steps per second: 33, episode reward: 780.000, mean reward: 1.099 [0.000, 400.000], mean action: 3.023 [3.000, 8.000], mean observation: 72.878 [0.000, 228.000], loss: 1.750635, mean_absolute_error: 0.022866, acc: 0.987192, mean_q: 1.000000\n", " 51616/100000: episode: 112, duration: 12.981s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 2.289796, mean_absolute_error: 0.022587, acc: 0.986247, mean_q: 1.000000\n", " 52052/100000: episode: 113, duration: 13.128s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.018 [3.000, 7.000], mean observation: 72.909 [0.000, 228.000], loss: 6.869392, mean_absolute_error: 0.026987, acc: 0.988174, mean_q: 1.000000\n", " 52486/100000: episode: 114, duration: 12.987s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.911 [0.000, 228.000], loss: 0.908648, mean_absolute_error: 0.021901, acc: 0.987975, mean_q: 1.000000\n", " 53028/100000: episode: 115, duration: 16.227s, episode steps: 542, steps per second: 33, episode reward: 110.000, mean reward: 0.203 [0.000, 10.000], mean action: 3.015 [1.000, 8.000], mean observation: 72.861 [0.000, 228.000], loss: 0.927577, mean_absolute_error: 0.021164, acc: 0.988872, mean_q: 1.000000\n", " 53549/100000: episode: 116, duration: 15.605s, episode steps: 521, steps per second: 33, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.854 [0.000, 228.000], loss: 6.778060, mean_absolute_error: 0.024810, acc: 0.987224, mean_q: 1.000000\n", " 53976/100000: episode: 117, duration: 12.945s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.895132, mean_absolute_error: 0.023102, acc: 0.988876, mean_q: 1.000000\n", " 54417/100000: episode: 118, duration: 13.326s, episode steps: 441, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 0.815021, mean_absolute_error: 0.021397, acc: 0.987883, mean_q: 1.000000\n", " 54848/100000: episode: 119, duration: 12.849s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 6.555128, mean_absolute_error: 0.023074, acc: 0.989342, mean_q: 1.000000\n", " 55278/100000: episode: 120, duration: 12.983s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.788291, mean_absolute_error: 0.020740, acc: 0.988517, mean_q: 1.000000\n", " 55712/100000: episode: 121, duration: 13.138s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.905 [0.000, 228.000], loss: 0.701904, mean_absolute_error: 0.018827, acc: 0.988767, mean_q: 1.000000\n", " 56144/100000: episode: 122, duration: 12.976s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.016 [3.000, 8.000], mean observation: 72.912 [0.000, 228.000], loss: 6.674213, mean_absolute_error: 0.026572, acc: 0.986400, mean_q: 1.000000\n", " 56584/100000: episode: 123, duration: 13.134s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 6.481470, mean_absolute_error: 0.024026, acc: 0.989205, mean_q: 1.000000\n", " 57010/100000: episode: 124, duration: 12.779s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 2.567588, mean_absolute_error: 0.024750, acc: 0.987089, mean_q: 1.000000\n", " 57440/100000: episode: 125, duration: 12.885s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [0.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 2.430678, mean_absolute_error: 0.023412, acc: 0.988154, mean_q: 1.000000\n", " 57878/100000: episode: 126, duration: 13.035s, episode steps: 438, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.009 [3.000, 5.000], mean observation: 72.909 [0.000, 228.000], loss: 2.234050, mean_absolute_error: 0.021098, acc: 0.989155, mean_q: 1.000000\n", " 58316/100000: episode: 127, duration: 13.131s, episode steps: 438, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 1.070799, mean_absolute_error: 0.022651, acc: 0.986658, mean_q: 1.000000\n", " 58750/100000: episode: 128, duration: 13.015s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.919 [0.000, 228.000], loss: 3.657577, mean_absolute_error: 0.023798, acc: 0.988119, mean_q: 1.000000\n", " 59176/100000: episode: 129, duration: 12.747s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 2.296508, mean_absolute_error: 0.023142, acc: 0.989583, mean_q: 1.000000\n", " 59653/100000: episode: 130, duration: 14.280s, episode steps: 477, steps per second: 33, episode reward: 110.000, mean reward: 0.231 [0.000, 10.000], mean action: 3.010 [3.000, 8.000], mean observation: 72.867 [0.000, 228.000], loss: 0.889277, mean_absolute_error: 0.021522, acc: 0.988273, mean_q: 1.000000\n", " 60084/100000: episode: 131, duration: 12.940s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.917 [0.000, 228.000], loss: 0.812250, mean_absolute_error: 0.021473, acc: 0.987602, mean_q: 1.000000\n", " 60510/100000: episode: 132, duration: 12.855s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 6.668528, mean_absolute_error: 0.024226, acc: 0.987969, mean_q: 1.000000\n", " 60951/100000: episode: 133, duration: 13.184s, episode steps: 441, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.011 [3.000, 6.000], mean observation: 72.905 [0.000, 228.000], loss: 6.516801, mean_absolute_error: 0.023675, acc: 0.988450, mean_q: 1.000000\n", " 61377/100000: episode: 134, duration: 12.783s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.774365, mean_absolute_error: 0.020130, acc: 0.990023, mean_q: 1.000000\n", " 61807/100000: episode: 135, duration: 12.900s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 6.000], mean observation: 72.918 [0.000, 228.000], loss: 0.730819, mean_absolute_error: 0.019347, acc: 0.988808, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 62241/100000: episode: 136, duration: 12.967s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.739101, mean_absolute_error: 0.019514, acc: 0.989127, mean_q: 1.000000\n", " 62688/100000: episode: 137, duration: 13.364s, episode steps: 447, steps per second: 33, episode reward: 60.000, mean reward: 0.134 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.831790, mean_absolute_error: 0.021591, acc: 0.988465, mean_q: 1.000000\n", " 63121/100000: episode: 138, duration: 13.000s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 7.999557, mean_absolute_error: 0.025455, acc: 0.988380, mean_q: 1.000000\n", " 63546/100000: episode: 139, duration: 13.515s, episode steps: 425, steps per second: 31, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.910 [0.000, 228.000], loss: 6.665014, mean_absolute_error: 0.023986, acc: 0.988235, mean_q: 1.000000\n", " 63987/100000: episode: 140, duration: 13.334s, episode steps: 441, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.847174, mean_absolute_error: 0.021910, acc: 0.988662, mean_q: 1.000000\n", " 64433/100000: episode: 141, duration: 13.484s, episode steps: 446, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.004 [3.000, 5.000], mean observation: 72.907 [0.000, 228.000], loss: 0.801544, mean_absolute_error: 0.021092, acc: 0.988299, mean_q: 1.000000\n", " 64862/100000: episode: 142, duration: 12.891s, episode steps: 429, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 6.596956, mean_absolute_error: 0.023347, acc: 0.989948, mean_q: 1.000000\n", " 65301/100000: episode: 143, duration: 13.241s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 0.756605, mean_absolute_error: 0.020061, acc: 0.988468, mean_q: 1.000000\n", " 65740/100000: episode: 144, duration: 13.186s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.014 [3.000, 8.000], mean observation: 72.913 [0.000, 228.000], loss: 0.754340, mean_absolute_error: 0.019724, acc: 0.990248, mean_q: 1.000000\n", " 66175/100000: episode: 145, duration: 13.118s, episode steps: 435, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.998 [0.000, 5.000], mean observation: 72.912 [0.000, 228.000], loss: 13.629837, mean_absolute_error: 0.026191, acc: 0.991738, mean_q: 1.000000\n", " 66613/100000: episode: 146, duration: 13.218s, episode steps: 438, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.728253, mean_absolute_error: 0.019346, acc: 0.988870, mean_q: 1.000000\n", " 67050/100000: episode: 147, duration: 13.268s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 0.771391, mean_absolute_error: 0.020079, acc: 0.990132, mean_q: 1.000000\n", " 67487/100000: episode: 148, duration: 13.208s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 1.055708, mean_absolute_error: 0.023504, acc: 0.988630, mean_q: 1.000000\n", " 67917/100000: episode: 149, duration: 13.018s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.904 [0.000, 228.000], loss: 0.734914, mean_absolute_error: 0.019412, acc: 0.989390, mean_q: 1.000000\n", " 68349/100000: episode: 150, duration: 13.049s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.879950, mean_absolute_error: 0.020989, acc: 0.988860, mean_q: 1.000000\n", " 68776/100000: episode: 151, duration: 12.993s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.002 [2.000, 5.000], mean observation: 72.910 [0.000, 228.000], loss: 0.810485, mean_absolute_error: 0.021195, acc: 0.988583, mean_q: 1.000000\n", " 69200/100000: episode: 152, duration: 13.099s, episode steps: 424, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 6.769201, mean_absolute_error: 0.024035, acc: 0.989608, mean_q: 1.000000\n", " 69634/100000: episode: 153, duration: 13.483s, episode steps: 434, steps per second: 32, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 6.727956, mean_absolute_error: 0.025991, acc: 0.990351, mean_q: 1.000000\n", " 70065/100000: episode: 154, duration: 13.497s, episode steps: 431, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.831990, mean_absolute_error: 0.019917, acc: 0.988907, mean_q: 1.000000\n", " 70500/100000: episode: 155, duration: 13.260s, episode steps: 435, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.903836, mean_absolute_error: 0.021813, acc: 0.987644, mean_q: 1.000000\n", " 70924/100000: episode: 156, duration: 12.851s, episode steps: 424, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.813252, mean_absolute_error: 0.020972, acc: 0.990050, mean_q: 1.000000\n", " 71351/100000: episode: 157, duration: 12.881s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.762409, mean_absolute_error: 0.019593, acc: 0.991437, mean_q: 1.000000\n", " 71898/100000: episode: 158, duration: 16.541s, episode steps: 547, steps per second: 33, episode reward: 60.000, mean reward: 0.110 [0.000, 10.000], mean action: 3.004 [3.000, 5.000], mean observation: 72.901 [0.000, 228.000], loss: 5.462224, mean_absolute_error: 0.022918, acc: 0.989431, mean_q: 1.000000\n", " 72325/100000: episode: 159, duration: 12.927s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.913 [0.000, 228.000], loss: 0.815619, mean_absolute_error: 0.020754, acc: 0.991584, mean_q: 1.000000\n", " 72749/100000: episode: 160, duration: 12.806s, episode steps: 424, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.805345, mean_absolute_error: 0.020817, acc: 0.989829, mean_q: 1.000000\n", " 73176/100000: episode: 161, duration: 12.953s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 6.730695, mean_absolute_error: 0.024181, acc: 0.989095, mean_q: 1.000000\n", " 73615/100000: episode: 162, duration: 13.336s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.837817, mean_absolute_error: 0.020025, acc: 0.989749, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 74051/100000: episode: 163, duration: 13.276s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 6.441117, mean_absolute_error: 0.021877, acc: 0.989822, mean_q: 1.000000\n", " 74488/100000: episode: 164, duration: 13.053s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.819825, mean_absolute_error: 0.021008, acc: 0.990346, mean_q: 1.000000\n", " 74921/100000: episode: 165, duration: 13.294s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 2.998 [0.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.865574, mean_absolute_error: 0.022025, acc: 0.990257, mean_q: 1.000000\n", " 75354/100000: episode: 166, duration: 13.565s, episode steps: 433, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.708961, mean_absolute_error: 0.018218, acc: 0.992350, mean_q: 1.000000\n", " 75781/100000: episode: 167, duration: 12.828s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 8.054317, mean_absolute_error: 0.024153, acc: 0.989681, mean_q: 1.000000\n", " 76295/100000: episode: 168, duration: 15.366s, episode steps: 514, steps per second: 33, episode reward: 110.000, mean reward: 0.214 [0.000, 10.000], mean action: 3.010 [3.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 5.656568, mean_absolute_error: 0.023127, acc: 0.990333, mean_q: 1.000000\n", " 76710/100000: episode: 169, duration: 12.637s, episode steps: 415, steps per second: 33, episode reward: 60.000, mean reward: 0.145 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.841335, mean_absolute_error: 0.019893, acc: 0.990361, mean_q: 1.000000\n", " 77142/100000: episode: 170, duration: 13.082s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.767345, mean_absolute_error: 0.017985, acc: 0.991753, mean_q: 1.000000\n", " 77592/100000: episode: 171, duration: 13.450s, episode steps: 450, steps per second: 33, episode reward: 110.000, mean reward: 0.244 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.899 [0.000, 228.000], loss: 0.689204, mean_absolute_error: 0.018016, acc: 0.991042, mean_q: 1.000000\n", " 78114/100000: episode: 172, duration: 15.622s, episode steps: 522, steps per second: 33, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.850 [0.000, 228.000], loss: 0.714887, mean_absolute_error: 0.018603, acc: 0.990960, mean_q: 1.000000\n", " 78544/100000: episode: 173, duration: 12.919s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.737498, mean_absolute_error: 0.019308, acc: 0.989971, mean_q: 1.000000\n", " 78975/100000: episode: 174, duration: 12.894s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.918 [0.000, 228.000], loss: 0.797951, mean_absolute_error: 0.018914, acc: 0.990502, mean_q: 1.000000\n", " 79417/100000: episode: 175, duration: 13.310s, episode steps: 442, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 12.094293, mean_absolute_error: 0.026394, acc: 0.991233, mean_q: 1.000000\n", " 79849/100000: episode: 176, duration: 12.968s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.907 [0.000, 228.000], loss: 0.746510, mean_absolute_error: 0.019266, acc: 0.991247, mean_q: 1.000000\n", " 80281/100000: episode: 177, duration: 13.057s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 0.878578, mean_absolute_error: 0.020813, acc: 0.990162, mean_q: 1.000000\n", " 80717/100000: episode: 178, duration: 12.996s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.747418, mean_absolute_error: 0.017835, acc: 0.990396, mean_q: 1.000000\n", " 81150/100000: episode: 179, duration: 13.030s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 2.231029, mean_absolute_error: 0.021677, acc: 0.991700, mean_q: 1.000000\n", " 81577/100000: episode: 180, duration: 12.772s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.783079, mean_absolute_error: 0.020112, acc: 0.990925, mean_q: 1.000000\n", " 82008/100000: episode: 181, duration: 12.885s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.696581, mean_absolute_error: 0.018047, acc: 0.991734, mean_q: 1.000000\n", " 82440/100000: episode: 182, duration: 13.183s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.836997, mean_absolute_error: 0.019772, acc: 0.991030, mean_q: 1.000000\n", " 82871/100000: episode: 183, duration: 13.619s, episode steps: 431, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.916 [0.000, 228.000], loss: 13.865776, mean_absolute_error: 0.028900, acc: 0.991372, mean_q: 1.000000\n", " 83293/100000: episode: 184, duration: 12.943s, episode steps: 422, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 2.995 [0.000, 4.000], mean observation: 72.904 [0.000, 228.000], loss: 0.801700, mean_absolute_error: 0.020705, acc: 0.989929, mean_q: 1.000000\n", " 83729/100000: episode: 185, duration: 13.499s, episode steps: 436, steps per second: 32, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 6.508937, mean_absolute_error: 0.023269, acc: 0.990181, mean_q: 1.000000\n", " 84173/100000: episode: 186, duration: 13.531s, episode steps: 444, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.848594, mean_absolute_error: 0.019877, acc: 0.991695, mean_q: 1.000000\n", " 84615/100000: episode: 187, duration: 13.393s, episode steps: 442, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.853347, mean_absolute_error: 0.021670, acc: 0.990950, mean_q: 1.000000\n", " 85049/100000: episode: 188, duration: 13.252s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 6.567101, mean_absolute_error: 0.023859, acc: 0.991215, mean_q: 1.000000\n", " 85488/100000: episode: 189, duration: 13.414s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 12.108674, mean_absolute_error: 0.024965, acc: 0.991315, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 85917/100000: episode: 190, duration: 13.049s, episode steps: 429, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.760346, mean_absolute_error: 0.019699, acc: 0.989948, mean_q: 1.000000\n", " 86352/100000: episode: 191, duration: 13.272s, episode steps: 435, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.912 [0.000, 228.000], loss: 0.691576, mean_absolute_error: 0.017892, acc: 0.991882, mean_q: 1.000000\n", " 86777/100000: episode: 192, duration: 12.884s, episode steps: 425, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.688200, mean_absolute_error: 0.018250, acc: 0.989853, mean_q: 1.000000\n", " 87199/100000: episode: 193, duration: 12.817s, episode steps: 422, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.798901, mean_absolute_error: 0.020241, acc: 0.992595, mean_q: 1.000000\n", " 87631/100000: episode: 194, duration: 13.218s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.758069, mean_absolute_error: 0.017800, acc: 0.991536, mean_q: 1.000000\n", " 88076/100000: episode: 195, duration: 13.552s, episode steps: 445, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.907 [0.000, 228.000], loss: 0.755982, mean_absolute_error: 0.019846, acc: 0.989045, mean_q: 1.000000\n", " 88513/100000: episode: 196, duration: 13.312s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.821040, mean_absolute_error: 0.020977, acc: 0.990775, mean_q: 1.000000\n", " 88975/100000: episode: 197, duration: 14.064s, episode steps: 462, steps per second: 33, episode reward: 110.000, mean reward: 0.238 [0.000, 10.000], mean action: 3.000 [2.000, 4.000], mean observation: 72.892 [0.000, 228.000], loss: 0.816265, mean_absolute_error: 0.019451, acc: 0.990530, mean_q: 1.000000\n", " 89401/100000: episode: 198, duration: 12.976s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.912 [0.000, 228.000], loss: 0.783816, mean_absolute_error: 0.020237, acc: 0.990244, mean_q: 1.000000\n", " 89827/100000: episode: 199, duration: 12.980s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 3.702346, mean_absolute_error: 0.023155, acc: 0.990390, mean_q: 1.000000\n", " 90270/100000: episode: 200, duration: 13.735s, episode steps: 443, steps per second: 32, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.020 [3.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 6.430346, mean_absolute_error: 0.023296, acc: 0.991112, mean_q: 1.000000\n", " 90691/100000: episode: 201, duration: 12.984s, episode steps: 421, steps per second: 32, episode reward: 60.000, mean reward: 0.143 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 6.711028, mean_absolute_error: 0.023351, acc: 0.990202, mean_q: 1.000000\n", " 91130/100000: episode: 202, duration: 14.014s, episode steps: 439, steps per second: 31, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.857046, mean_absolute_error: 0.020169, acc: 0.991173, mean_q: 1.000000\n", " 91578/100000: episode: 203, duration: 13.709s, episode steps: 448, steps per second: 33, episode reward: 60.000, mean reward: 0.134 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 7.718023, mean_absolute_error: 0.023832, acc: 0.991211, mean_q: 1.000000\n", " 92011/100000: episode: 204, duration: 13.224s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.776836, mean_absolute_error: 0.018400, acc: 0.990834, mean_q: 1.000000\n", "done, took 2826.234 seconds\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Visualize the training during 5000000 steps \n", "## Each episode is a game in wich Pacman has two lifes \n", "### When nb_steps is not long enough , a warning might appear \n", "### \"Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!\"\n", "\n", "dqn.fit(env, nb_steps=100000, visualize=True, verbose=2)\n" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJztnXeYFFX2/t87MAGGkZxHEXElmR0DYkBFwYSi7ndxWRO6IJiXXVfMyoq6rIoRhAXElVURFSMqKEYQRQSJgwiSRYJIlEnn98fb91fV1dU9HYqe6dnzeZ55uivf7pl569S57z3XiAgURVGUzCerqhugKIqiBIMKuqIoSg1BBV1RFKWGoIKuKIpSQ1BBVxRFqSGooCuKotQQKhV0Y8w4Y8zPxpiFrnXDjTFLjTHfGWNeN8Y02LfNVBRFUSojngj9OQA9PeumAThURA4HsAzAkIDbpSiKoiRIpYIuIp8C2OpZ94GIlIUWvwRQuA/apiiKoiRA7QDO0Q/Ay9E2GmP6A+gPAPn5+cd06NAhgEsqiqJUY/buBRaGstTHHJPy6b755pvNItK0sv1MPEP/jTEHAnhbRA71rL8DQBGAiySOExUVFcmcOXMqvZ6iKEpGU1wM2OA1gPIqxphvRKSosv2SjtCNMVcCOA/AGfGIuaIoyv8MjRsDJ5wAtGuX1ssmJejGmJ4AbgVwqojsDrZJiqIoGU6TJsCsWWm/bDy2xRcBzALQ3hiz1hhzNYCnABQAmGaMmWeMGbWP26koipI5lJQAS5YAP/yQ1stWGqGLyKU+q8fug7YoiqLUDBYtAo4+mu/TmJHWkaKKoihBU1FRJZdVQVcURQma8vIquawKuqIoStC4I3RNuSiKomQwbkFPY/oliJGiiqIoips2bYDf/x5o2TKtl1VBVxRFCZrWrYFJk9J+WRV0RVGUoNm1C5g/H8jLA448EshKT3ZbBV1RFCVoZs0CzjyT77dvBwoK0nJZ7RRVFEUJGrdtMY0WRhV0RVGUoKkil4sKuqIoStC4o3IVdEVRlAymilIu2imqKIoSNEceCdx8M9CsWdo6RAEVdEVRlOBp0wZ47LG0X1YFXVEUJWi2bAG++gqoVw847jggNzctl1VBVxRFCZpp04BLQ1NJrFgBtG2blstqp6iiKErQqG1RURSlhqADixRFUWoIGqEriqLUENSHriiKUkPo1g14+GGgcWOW0k0TKuiKoihBc9BBwK23pv2yKuiKoihBs3YtMHMmUL8+cOKJWj5XURQlY5kyBfjDH4CePYHi4rRdVgVdURQlaNTloiiKUkNQH7qiKEoNobpG6MaYccaYn40xC13rGhljphljvg+9Nty3zVQURckgqnGE/hyAnp51twH4UER+B+DD0LKiKIoCABdfDDz3HDtHO3dO22UrtS2KyKfGmAM9qy8A0C30fgKAjwH8PcB2KYqiZC7t2vEnzSTrQ28uIhtC738C0Dyg9iiKomQ+xcX0oTdsSB96s2ZpuWzKnaIiIgAk2nZjTH9jzBxjzJxNmzalejlFUZTqz0svAf36Ab17A3Pnpu2yyQr6RmNMSwAIvf4cbUcRGS0iRSJS1LRp0yQvpyiKkkFUV5dLFN4EcEXo/RUA3gimOYqiKDUAt7OlOgm6MeZFALMAtDfGrDXGXA3gIQBnGmO+B9A9tKwoiqIA4SJencrnisilUTadEXBbFEVRagZVFKFrtUVFUZSgGTCANdHr1q1ePnRFURQlQQ46iD9pRgVdURQlaL7+GvjuOyAvD+jaFTjwwLRcVgVdURQlaJ5/HnjqKb5/4YW0CbpWW1QURQmaDPOhK4qiKNGoItuiCrqiKErQVNeBRYqiKEqCVNeBRYqiKEqC3H03MGgQ3x9wQNouq4KuKIoSNAcckFYht6igK4qiBM306cCaNUBJCeuhH3ZYWi6rgq4oihI048YBH3wAbNkCjBiRNkHXTlFFUZSgKS8HsrP5Xl0uiqIoGUxFhQq6oihKjcAdoevAIkVRlAymiiJ07RRVFEUJmpEj6XApKQEaN07bZVXQFUVRgqZlyyq5rAq6oihK0EyaxOj8p5+ALl1YEz0NqKAriqIEzb//DezcCXz5JXDnnWkTdO0UVRRFCZqKCiAriz/qclEURclgyssp5rVqqQ9dURQlo6mooJhrhK4oipLh2JRLmiN07RRVFEUJmjfeAETodKlXL22XVUFXFEUJmkaNquSyKuiKoihBM2oUI/N164DOnYHzzkvLZVXQFUVRgmbMGKBVK2D2bOCSS9Im6Cl1ihpjbjHGLDLGLDTGvGiMyQuqYYqiKBmLtS1mZWWGbdEY0xrAjQCKRORQALUA9AmqYYqiKBlLhtoWawOoY4ypDaAugPWpN0lRFKUaMG8ecNttdKskSqYNLBKRdQD+BWA1gA0AfhWRD7z7GWP6G2PmGGPmbNq0KfmWKoqipJNzzwUefpjzgiaKO0LPBB+6MaYhgAsAtAWwDcArxpg/icgL7v1EZDSA0QBQVFSUxK1OURSlCigsBNavBzZtApo0SezYuXMBYxip106f9ySVlEt3ACtFZJOIlAJ4DcCJwTRLURSlinnoIb5u3Jj4sXXqAHl5QH4+kJsbbLtikMqtYzWAE4wxdQHsAXAGgDmBtEpRFKWqadMGOPtsinOi3H8/cPDBwOrVtC9efnnw7fMhlRz6bACTAcwFsCB0rtEBtUtRFKVqee89RtnHH5/4sWPHAtOmARMmAO+8E3zbopBSckdE7gFwT0BtURRFqT4sXgx8+mlyx7p96BlkW1QURamZlJXR4TJgQOLHVlG1RRV0RVEUP0pL+bpqVeLHZujAIkVRlJpJWRlfk3G5AE6EnszApCTR4lyKoih+tG7N159+SvxYe4wI/ehpQgVdURTFj2HDGGEPG8a0Sa1aiZ8jjWIOqKAriqJEp0sXoF8/YO9eoG7d+I+77jrg1FOBFSuA7Gxg8OB910YXKuiKoih+DB4MbN0KjB+f+LHPPccBSV9+yZGiKuiKoihVyOLFFPSKCv4kUpPFulzUtqgoilINKC0F1qwBcnKA//43sWN1YJGiKEo1oqwMaNmSgpyo06WKInRNuSiKovhRVgY0bMiKiYkKekEBc+j5+WpbVBRFqXI6d6Yor1yZuKD/8su+aVMlqKAriqL48eyzfP366+QGF1UBKuiKoiixuPzyxPLgZWVA377ApZcCy5cDmzc7k2XsY1TQFUVR/DjvPKB9e+CRRxI7rqwMmDQJOOoo+tB//DFtgq4uF0VRgqNrVwqhl9JS4OqrOXIyU1i2DNiwgQK9YUP89kO7n9oWFUXJaGbO9J+hZ9YsYNw44Mor096kpCkt5WCi8eM5jdz69fEdZ9MzqdgWv/qKU9jNmpXQYSroiqLsew45hK+XXhrf/qedxunbguKeezgtXCKUlbEOS4sWXI63Y9QKuI3QkxH0HTuAH35wSvjGiebQFUXZ9+Tk8LWkJL79P/6YP1dcEcz177+fr61bAz17xndMWRkj9Pr1ubx9e+Q+69YxNXPaac46EaCwkF70xo2Bpk0Tb2/Xrsy9N2+e0GEq6Iqi7HvWrePrnDnOunfeAVavBgYODN/XTghx223BXb+ggFFvnTrxH3PqqcChhzo3IzuDkZtBg4A33wyfxKJBA5YMSIW8PKBNm4QP05SLoijBYgXQza+/8tVOGgGw83TQoMh9bZohPz+4NmVnA9dfT5GOl5deAm64gccC/k8XnTr5f95UWbyYddg3bUroMBV0RVGCQYTR9uTJ/tsA4MwzKz+PjYQTLYgVi5tuAs46C5gxI/FyuIWFtB127Bi5rVYtCr07Qt+6FTjnHGDqVGDEiOQ6gufPB+64g5NUJ4CmXBRFCQZjgCee8O/Is9GtO21x8sn+EWhuLl/r1QuubXffzdc//5mpnquuqvyYdu2AAQOAW28F/v53/30eeICv5eVOed3duynmvXsDCxYA06cn3l77PdmngzjRCF1RlGAQYTTaoUPkNitQDz7orLvqKqZBvNSqBRQVJdeZGI3Nm4HffqNA+uXC/Vi5knn3sjI6TmzayA93OiYI26I9X4LpHBV0RVGCYeNGYOJEYNWqyG224mCWS3KaNvW3Au7axc7TBQuCaVd5Oa/1z39SIONx2lRU8AaVnc1BRQcf7J9KatmSr+75RoMYWKQRuqIoVcrevdG39egBNGrECoaWKVOAf/wjct/Nm/maqlPEYsUxJyf+CN2mjWrXjt0petxxwOGHO2kiIJgIXQVdUZQqJZagAxRUt5hGG+gTb0okXqwQZ2dHtiEadh+3oPsdt307b0DufoPatZl2atCAg5LatUu8zQMG8LwNGyZ0WEqCboxpYIyZbIxZaoxZYozpksr5FEXJYGIJ+owZTK+sXRu5ze0QARzhfOmlYNrljnYHDwa+/77yY4wBfv97CnOsQVG//caSANZnD9A/vmQJcP75wF13sUBXouTmclBSVmISnWqE/jiA90SkA4AjACxJ8XyKomQqVtCPPjpymy3Kde65kdu8ka87og4Cdwdjo0bAgQdWfkzduqyY2KtX7Ajd+uiDfqqYNg0YMiTyZlcJSQu6MaY+gFMAjAUAESkRkW3Jnk9RlAynWTPgb39jx6gXK6oXXRR9m8WK45AhwbSrXj0O0jn+eE5WMWxY5ekhNzk5wFNP0cfuxZ7H/Rl++IGWzE8+AUaOjM977+XTT4GHH054+rpUIvS2ADYBGG+M+dYY829jTMTQLmNMf2PMHGPMnE0JjnpSFCWDOOAA4L77mPeNlkbZutVZ17Ila7V4/eadOgEnnUTbYBAUFPDmcNRRrAZ5xx30isdi3TrmwP/zH6Y9rrsOOOaYyP2uuYav7gh9xw7g88/5WX/8Efjss8TbXFKS1AjUVAS9NoCjAYwUkaMA7AIQUXxBREaLSJGIFDUN0leqKEr1oqSERbBatAD27AnfZgXPXbdlyBDmqb3UrQsce2y4cyTVdq1cSRGPt0hYSQl959ZyuGCBf/7fvb8lKJdLEimnVAR9LYC1IjI7tDwZFHhFUf4XmTrVmZnHm1OuWzdy/6Ii4K23wqN2gAW7xowBdu4Mpl1LlwIHHcT2xcqHu7GuFbt/167+MxfZm467Rk0Q5XPTLegi8hOANcaY9qFVZwBYnOz5FEXJcNx5aa9gDhzIvLLbtfHBB5yIeZun623RIkfMg5jtx90pGm+E7rYtAtH964WFwB//yAkwLEENLEpzhA4ANwCYaIz5DsCRAIaleD5FUTIVt6D7CabXA37vvXz1CqV7Od766bFw2xZjDRJy4x5YBET3r+/YwTy5+2nCpowaNqTgFxUl7FbBM88kNbAqpeJcIjIPQFEq51AUpYbgFkmv+I0ZA3z4IYUOCBc4r7ja5e++S6x+eTTcI0V79+aAnQYNYh/ToAHQrx/Qti2Xs7P9bwKnn06//KefssIiABx2GKeQA4AuXYD+/RNvc1ZW2jtFFUVRHGyEfsstziw/liWhISqDB/PVnYaIFqEH7UPPzubEEY0bh9de8eOAAziS1Xrqo6Vcbr01/BpBMWYMMHRowoepoCuKEgzHHkvb4gMPREbAJSUc1POHPzjLFm+O2QrnZZfFP49nLA45hGV9Dz6Yo0SHDPEvIBaLRx6JnFkJcMr/usX+66+BI48EvvkGeO452iV/+y2x602dyoFNCaKCrihKMBx7LPDXv3IovFfASkvpZlm6lMtW0B991EnDWC66iDeFOXOAX35JvV0HHMCZh1q2pIPmoYcqF/SPP2b+/JNPuHzhhXS6eOnRg6/uG9T27ZygYvduCv68eYl3jFZRp6iiKAr55RdgwgRGwgsXhm+zgnf22XzNywOefDJ8cmVLvXrO7ECJjOiM1a4FC3iuWPODuikrowjb1Mz8+czpR8N9Pq/Lxb0uXlTQFUWpUoYPd2qbeHPKtm64XZ+XR3H/178Ywbr56ivgscf4PtFUhR9Tp7LE7Y8/Ju9yufZaPn24sf7yli3Z+eldbwcWudfFiwq6oihVSiwf+rBhzEHb9Xv30hkycWKkPe+zz5zh8kFE6O5O1ngjdK8P3c+2aJdvuAFo395ZH0SEnpWVlMNH5xRVFCUYYgk6EO4UWbWKtkAgum2xceOEi1P54p3gwu+aXrwjRbOzI8sZ2HMsXMjO2xYtuNyoEe2M9esD++/PtFJlrhov06Yltn8IFXRFUYIhlqD/5S90muTnR26PFvlu3Ji4EPrhti127uzMLRqLgw4CbrzRmdc0O5udnW5ycmgtvOsu4IgjHAtjly703AOM3Hv3Tv0zxImmXBRFCYa9e5kmeOopVkx0s3AhxXnkSC67I2S/8rlZWcGIuT0fQFHOymL9lcomjjjiCODxx50h/X5zkebmOiIedD30225jp3GCqKArihIMffsCTz/NUrNt2oRvKy0FTjyR3nK7bKntSRSUlLAT8ZxzWO8lVc48Exg3ju6ZX39lzvvzz2MfU1rKFIsd0Xr77cCIEZHtXL7c2d/y7rucdm7ZMuCVV+j6Wb8+sTZPmVJ5G31QQVcUJRh69GChqvnzgS1bwreVltKT/cUXFGsb7X7wAY9xc++9PMfUqXSmpEqnTsBVVzHK/u03PkHEsiACwPPPsyaLLZl7/PFAt27h+6xd60x67Y7ed+zgDE1lZXz/ww+JR/DqclEUpUpZsYKCfeSRwNtvh28rLeWgopNOoqj+7nccRWkF0U2dOk452iBsi6tWAbNm8X2i5XPt08OCBcD06eH7RKtdo7ZFRVEynj//2XGueAWzc2fHsVJaCjRvzjIAgwYBr70Wvu9rrzk+9CBsi6NHs3QvkHz53Mcf5+xKfvv8/vfhTxlB2BarYMYiRVEUh717nenkvII+bhxdLgDFassWTgf3xhtAcXH4vu+8w8JYQDARujvaTXaCC79OUbv8pz+xXosliAi9YcPIAmdxoLZFRVGCIZagA+FiOmsWcMklXPYTyjp1OPy/sjK38eCOdrOzGXVXVp/cm3Lxq7Zol+fOZQrJlisoLAQuuIAWzcJCoFcv/xmbYmGrUyaICrqiKMGwdy/QrBnfe8XvrLM4E5HdVpkPPTsbWBzQBGjuCD0rK74OyhNO4GTSdoo5v5GibdsynTNwIOcsnTCB608/nT8AU0unnBLM54gDTbkoihIMe/cyVTBhglOEy7J4MSPYV18FmjSJ7UNPMn8cldLSxM930knAP/4RnqrxtrN5c/YbHHRQ8D703r2BF19M+DAVdEVRgmHYMHrQL788cmBRaSnTEhddxFSEFceGDSPTKjaivuQSnjNVBg4E/v1vZ/m664D//Cf2MTt2OLXOAeDqqyM98b/8wtrnpaXhYj9hAsV+40ZaL5s3j6w+GYuKCvrQly2L/5gQmnJRFCUYLr6YrzNnsgKhnb4NoOBt2wa8/z7TGTaiXbKEgudmyhS6Qjp2DGYKuqOOCu+0fPllvtpBTn48+CArQVqhbteOP24++4y5coDVHC27dgE//0xXT2kp3ycyo1EKMzZphK4oSjB8+SU936efDowaFb6ttBT49lugZ09Gnj16AJMn+3d61qrFFElubjAul7lzeZOx+DlWvJSWho9gLS4G/vtf/9x/bq7/DEzJ2hbtuZJIO2mErihKMHTvDgwY4O8IOe00RtvFxdx20EH86dsXOPRQTgtneeQRimReXjCCPnQoBz3Nn89lvw5OL2Vl4YI+dSrnSj3nHOcmZIX3ySfDI/RUbYspROgq6IqiBMPevRRiP0F/6y1gxgzWNikp4dyeK1Ywqvfyyiv0YOfmBlcP3S2Ofh2cXsrKwo/xG5BkP+Ppp4enY1KN0EV4vkaN4j8mhAq6oiipU1FBEczNjR4BuyeX+O9/WbOlffvotsXDDw+ufK47fdG0aeW+cG/KxW9Akn0/bx6wbp1jT+zQgU8eOTksYdC3L2u7x0vjxk7RrwRRQVcUJXVsJG0jdHcku2cPcMgh9KID3FZSQrHOzY0UdCvAzz4bTNu8Ebo7nx6N3r2ZCrL4RejduvHG9PTTdMV8/TXXn3MOfwCe44UXUmp+IminqKIoqWOFLjcXGDOGJWrd29au5ew9778PHHec4w3366BMsjBVzLYler4ePYDrr3eW/SL0du2ASy9leigRF0tlrF4NnHEGU1QJooKuKErq1KlD18p559HJ4rYJWrFr0oRRetOmjsh26gQceGD4uWrVYofo7bcD556bettGjAAeeshZvvNO4O67Yx+zYUP4XKc9ejACP+AAZ92PPwIffRQ5+vShh5jSKS1l9cm6dRMT5+3bed7Nm+M/JoSmXBRFSZ2cHMeH/vnnjNSPPZbLVuxKSij6RUVOhG6Hy7uxJQKuvDKxATnRsO2w2Jrssbj5ZtZMtzVVGjeOzIO/+CJvOhdfHC7oJSVMM9n8/549Tm2YeEjBtphyhG6MqWWM+dYY83bleyuKUiPZuRN47z1OlnzjjcD99zvbrEBt28ZSsx9/zJTMK6/EPmdeXjAul6lTw9008bhcvJ2ia9eybsvGjc46ew73yFfAuVkYk5zLpYoHFt0EILnSYIqi1AxWrWL9lk8/jbQt5uVxGH/79lwuKaETpFs3Th79pz+Fn2vgQHYkBuVDv+kmp3QvEN/AIq9tcelSeuy//95ZV1rKKHzIEGf0KUDxNoY/NkpPk6CnlHIxxhQCOBfAAwD+ksq5FEXJYNwuF69gtmjBaPznn7lcWkqniZ2qbeXK8HNNnEgxD0rQvZ2iyQws8usUteft0CH82IoKR8hthJ7IwKK8PE5SnUTp4FRz6CMA3AqgINoOxpj+APoDwAHuDgVFUWoOXttiZfXQH3uMFRg7d/b3oefkMKLv1o0DbexsR8ngdc20bMmbSWXHuAU92sCinByWNFi2jDMwAXTxDBjA982a8X0i2ldURG97EiSdcjHGnAfgZxH5JtZ+IjJaRIpEpKhp06bJXk5RlOpMLEH/5htWVfz4Yy7b6oTZ2f75bLvtqquYl09FzO353B2MTz8NTJsW+5gbbwT++ldn2d4Q3G3985+BSZPYOXrVVc76Cy7gRNQAhXzUKEbcaSCVHHpXAL2MMT8CeAnA6caY9DnoFUWpPrgF/aGHHEEDmDbZto3Wxi++4MhJK7Je8S8vZ3oiSB96Mr72889nqV+Le5SrpVMn2hm9N6XycidnLuJ8pnj56CM6c9z5+jhJWtBFZIiIFIrIgQD6APhIRP5UyWGKotREjjmG0XSnTvSgFxU529xVCU88EWjVyklXdOwYvm9ZGf3qBQUchdmmTXhd8mSYPp02RMsTTzgWy2gUFzO/bznkEFoYzzzTWTdnDs+dkxMu2n/9q1OHZelSpm4qc/S42bSJ505i0gz1oSuKkjpNmjBaBWgR3LyZg4wAR5hycoDnn6eI27TK3/8efp7cXEfAx43jqMk9e1Jrm/uGATDyrWygzx//yFz72yE3dl5eZOfnY48Bs2cD/fpxubSU7S8vdzpDk+kUrep66CLysYicF8S5FEXJQFau5KChXbuYbrnpJmebTUdkZwPXXsu887PPhqdl/MjL42tlTpft2+mD90OEsxW5Oxmjddq68doWd+wAHn2Ug40s7rQR4JzTz+WSQT50RVH+1/noIw4a2rIl0rbYujVHfTZr5ohpx450uDzySHgt8W3bgAsvZM0XO0FzZYOL6teP7iIpLWXn5TvvOOuSsS3u3AkMHhxe2Mvm5i+7jJNo2NmV3BG61kNXFCXjsKLr19F55JHA+PHO9tJSThbdqBHn5Vy82Nl3507gjTdYw6VVK66Lx4v+yy/+6/3EMZmRon6dorYfoEUL/lhS9aE3awZ07ZrU9Hsq6IqipI672mKslIYV0zvuoNB37Oh0KLqLXNla4hddxA7SZHGfz9KqFZ8Kysuj11v3plz8bIu2H6C4mE8of/oT23rWWUBhIfepX5+dpJ07x9/mCy/kTxKooCuKkjqxfOjPPce0x/LlzjabrnBHvu65ObOzKfivvlr5tVu2BPr08d/mPp9l4ED+xOLRR1kV0uI3UvSxx7j85ZfAoEHsFC4ooIPGumgaNgSGD6/8MwSECrqiKKnjFvSbbw6vz1JS4uSk33+fonfCCZEdiu7JLhKpNLh+PSN8vxGlyZwPiIyQ/UaK2gkwbEVGe61duxj977cf27VjBzt4bZ9AZTzzDOcpXbAgPO0TB9opqihK6vTrx7K5tWrRO37MMc42t6h26MBUio3Qf/c7x94I8Pi2bSmGS5eyZO2UKbGvPWkSz+03bVuzZizH6/adT57M4fnbtkU/5xdfAD/8EN6utWtZTMzy9tsc/eoV+4ED+XQB0L7ZoAEwdmzsz+Bm40Z+9iSm31NBVxQldQoL2ZEH0CI4erTTEehOe7z0Ejs9reXvggs4gXS9etynY0cO6DnrLAra1q3RLYkAO0MHDWJE7C5ta7GTaLgnXN68mZNVxPK3n3tueIVGgDci206A/QAjRkSmY/x86InaFmvXTqrkgQq6oiipM3Mma5oArD8+YIAj5G6nySOP0IM+axZw222xz2l96LFsi1u30ioJ+Av65s0UXXf07pc+8eLtFAWAhx/mZ7P49QMA4S6XZG2LSZY+UEFXFCV1JkwAbrmF770R69FHc0ILW1q3tJSVFFu1Yqdny5aczg1g5cLu3TmAJ56BRe7o3U/Q16xhu9wzH/lZEL14bYsA8M9/Au++G75PTg5wyimstmgLcKUaoXuLiSWAdooqipI6e/c6nX5ewezenT+AY1t89FGmaEpLOcuRFe1Nm4APP6RQxxOhuwXd1lt349cp6mdB9OIdWORuu8XaFvPz2RdgSTVC79DBKaOQIBqhK4qSOm5B90boJSWOKGdnA7t3c9TltGmR4urOt+flAZdfHllDxY0V9C5dImu2eM9nad6cN5NorpOKCv+Kj147pk2NbNjA6N12ovbpA1x/Pd/n5gL33ceiZPEycGD4DEgJoBG6oiip4xehWzG9/XZg5Eja+XJyKOh2P6/4uyPq7Gz/SaTdVFTQLz5mjP/gHb+Rot260ZETizfeYIVFN96SBtOns5N03ToWGevYEWjXjtPtWbKzgbvvjn2tAFFBVxQlddyCftFFwEknOcPh3Z1848bRLtihAwXSK/6J1jE5+2ymWsrK6Hhp2DB8uz28/pYxAAAeCUlEQVRvIjnprCygV6/I9d4IvVMnvlr7o922aRNvNM2b0xu/bh299/Xrx3f9QYOY8//00/jbbJue8BGKoiheRo2iJRGgqLZvH55OsYLavLkjutnZtAL26ePYCgsKOCw/P5/LLVqEV26MxuWX01vu5dRTWYLX7Yv/+muK8ezZ/ucqLaXH3HbUuo9zPzGMGsVzeNNGV1zBCTIACvr++wOPP175Z7Bs2kR3ThKooCuKkjr77w8cfDDfL1/OTk/3pNBW9N58k0PmAYr8YYfR7ti+PdedfTYwfz4HJwH0Ysdyubz8MtC7N28Sfi6XvDy2zZ0vLynh6M7t2/3PuX07Bfmtt8LXFxQ4HbUA8+RvvhnZCex2uVgveaK2xSRdLiroiqKkzsSJjgAuWsROz7VruewWqFdfBV54gXbCaPVX3OTmxhb0BQsoqoWFHGLvHSy0cCHwj3+ER7yVuVzKyvjqdbk89RT7AgAKdHm5My+q+3xul4sx/EnUtpgRPvREPpSiKJnD8OGcSAKIjFjPP99xfdjp2goLGfHOn8+ORTsz0KRJwPHHO3npvLzKbYsFBUzlAJHWxXnzgLvuCi+vW5kP3a73CvqkSfxx72OrQq5fz1mOgPAIHaC4p2lgUXo7RZctS+vlFEVJE7Fsi17Xx4YNwNChnBAjK4vuFxuFr1sHfPWVs39eXuUDi+rVY80WgGkXm64B/DtF/SonurERup9t0d5c3J23tWpxcJTFHaED/IyJCPqppyY1nyiQbkHfu9e/IpqiKJmNn6BbMd26lf/zDRs6wnr33ez8tLlzr23RnqNv39juECvohx3G1Ip7ogm/8wE8X8+e4eVx3URLueTkOL53d4ReUsJrn3kmcPLJfBoRcY4bPpyjZePlzjvj39dDegW9vJyPRPbxSFGUmkGskaJ9+7LeyldfRU4aUZlt8W9/i33dRo3oF2/ThsWyvPiNFC0sDK/J4qVVK45W7dgxfL17pOh++3EgUaNGjL6HDgXq1qWgu59IAODGG2N/hgBJvw992TIVdEWpabgF/eij2SHapAmX3Z2i997LlML558ceWOQW/hkzGOF36xZ53Weecd6vWcPj3FG630jRyqhbFzj99Mj12dlO9F6rFnDQQXxv+wbttVasYHRv5zldtow3AO/TQzROPJElhCdOjL/NIdLvcikuTvslFUXZx8yfD9x/P9/n5rKj0Aq8u5MvP5+CCXDdfvsB11zjjMosLGSU67b7DR7MMrvz58duw6GHAsOGha+7/nqmfPbbz1m3dSutjOPG+Z9nyxZaKdevD1//yit01QDsZH3oITp6atVie+3N6NJLWW3ScvzxwIMPxm67m23bKp/zNArpFfSWLcMN/oqi1Axat+ZkFAAtgvfd54if24b38cec0QhghF6/Poftn3IK111zTfgIyawsDsMvKADOOSdyMug+fRyxbN480ouek8Pcvbvfzk5W8euv/p/lhx/oWJk3L3y927ny88/AkCGsCmmvY0XY63LJykq8HnpG2BZbtQKOOiqtl1QUZR9TUcHo3NZH2baNqRUbUbtTLl9/TaFfu9YppiUS2wWy//4U9Y0bWTPFzWefMcUBUNC9tsX33ousu16ZDz2abfGFFzjhs/tYey53WQCvyyUZ22JGDCwqLwfmzk3rJRVF2ceUlAD33ENxBSIF84YbgCuvDN9Wt67TyZiVxfQFwMj+5JMjr3HMMYzsx4wBvvnGWb9jhzOLULNmkRH6p59y1KqbZG2LX3wBPP98+LFWeDds4AQYwP9QhP7TT5wc1n5hiqJkPjYqtvZCr2BecYXj/LACeMstrFni3Xft2vC5PN3cdx8wfrzzlC/i2BYB/5SL32QRNvJOdKSoOwr3Ruj16jnvy8sjI/REBP2Pf3RSUAmSXpdLXh6/kJUrwwvCK4qSucyaxVdbHMtrW1y5koLXtKkjehMmcARn06YUTrdtMVp0mp/vRPoAh/mLOILety8DRjd+5zMG+MMfnGqJXqKlXNx5cu+ApaFD6Xrp25fpJ7d3fvhwpo3iZfjw+Pf1kLSgG2P2B/A8gOYABMBoEYldUswWtlm2TAVdUWoKM2cyhWKnYPOmXLp1ow1w/PhwcbVi6I58K0s3bNvGgTeXXAIceSRvItYe2LWrM1G1JVpdFFsZ0o8uXeiZ9/Oh23Z26cKnASvczz3HdX37snywG1sSIF5SGHyZSsqlDMBgEekE4AQA1xljotzyQlgbk1oXFaXmsHw5hdUK5377UXht/Ra3qF52GfDkk3zv7lB0R76xOgSzs4Gnn+ZTQYMGLF976aXctnMn19s6MEBy+ej69YFjj3Uif4u1XIrwnM2ahY+OtWI/b1542ui776KnkfzIyfEfJBUHSQu6iGwQkbmh9zsALAHQOuZBtWvT2qSCrig1h3feCS81awxF0Z16saLqdnzY7Tfe6AwaOuKI2Pnj/HwK+bp1kdu++46Dcr780lk3ZgywalXkvu3bO/ZJLz/8wOPcNwaATwbbtvHzLV5M0d2wwfksVtAvvpguH8vFF8c/a5GI/3ymcRJIp6gx5kAARwGIUjHexbhxwHXXBXFZRVGqC95o9tZbneH17qh7wQJnwgor8kOHAhdeyPd33smJI2LRujUFfe5cpl1sMS87At3dMWqMvzju2kWHjB+zZwP9+/vXV7csWcJBTLYsr/spIxWXi+2QrSrbojGmHoBXAdwsIhEV440x/Y0xc4wxczZt2sSpnQ4/PNXLKopSHRg5kjlir2CNGOEMEHJH6GvW8PWLL5ybwO7dzjyj8WAFfdMmet2tCNqKi24v+qhRLJzlxR1Re4nmcnnvPXam7twZ6XLJyXGePFLxoSc6BZ+HlATdGJMNivlEEXnNbx8RGS0iRSJS1LRpU9Z8mDgxchSWoiiZx1tvUVTdAgaE55RHjOCsQnY9QIGzHX9HHMERogA7O//v/2Jf84ADKLq28qG9MdSrB9SpEx5Zv/suJ9Xw4o6ovUTzoS9fznroe/ZECu/MmbyW/WzeCL26C7oxxgAYC2CJiDxa2f7/n7Iy4NprnY4RRVEyk4oKdkJ6nSVAeAQ8YAAdIIAjVNde67/vhg2stRKL0aOZbrEpk4ICvhoT6UWP1inqtiB6iWVbBHic17bodqWkknKpXRv4y1+SLpGSig+9K4DLACwwxthw+3YReTfmUfn5LGz/yisUdVuoR1GUzGLpUnYSnnhi5DYbAVdU8Gm8sJApESuuixZF7gtQTGPVPwcc8fRG6ABTQDb1AkR3zVx8sVMN0kusgUW2jd5I+oknWBvmrruAZ59lmRPL8OGRfQzRyM8HHnkkvn19SMXl8rmIGBE5XESODP3EFnPLFVfw7jplSrKXVxSlqvn2W74ee2zkNpty2bWL0aYdMu+erNm7LxDffJoLFrD64pYtwGmnhYtlz57hk0lEi9DvvdexVXrp25edno0aha93u3b692fe33bETp8OvBbKOvfq5dSpAYAePfyfYvwoL6c2JjmavmomiT75ZBaknzChSi6vKEoA5OXRf37wwZHbVq/mHKPeSLaoCBg4MDw6dqdc4vGNl5ZyYujDDwc++oh5c8uyZdxmqV2bUa8f7lmF3DRoAHToEBmhFxSwQ1aEfQZ16jhPC+7P8PHH4dNtzpnj3PwqY8UK+vhjDXyKQdUIelYWcPnlidc4UBSl+nDxxbT4+UXdtpPUb8Ygbxrk6qs54AhgNHvSSbGv2zo03MXPi/788+yAtZ2QH33kTEDtpkeP6H732bOBxx6LjJJ79WKtmUMOAd5/n/ZLdweqTRtdcEH4xBvXXx9Z8TEaVelySYkbbuBdqFat2LN6K4qSedx/P/PZXnvfxo3A2LHhk0f068cAD2BlxGgDfiy2JswNN0RWZmzenGK+ZUvscxgTvVP0gw/YMRmLL79k3txG6EGVz/Wb1DoBqk7Qmzblo8Xu3bxTptARoChKmikv55Rqj0cp3/Taa/RteyPOPXv46p4taOtWVmKNl6wsp9Nx06bwbbZD1DpdBg9mqQAv8fjQvVbMefM4ycaiRTw2K8vZZ7/9HINHKi6XjI3QLdnZwIEHsnD8yJFV3RpFUeJh1SqKZjT3ho1YmzblxBA2kvZWYgSYcunRg+/btXMmkYiFLaHrvb7tpLSDi6ZMcapBetsXy4deu3Zkgaxt2zj6ddOmyEkonnrKce4EMbAo4yJ0S3Y2f+Hnnw8MGsRRZ6NHV3WrFEWJxdKlfO3QwX+79XkXFNA1YidUtpGne+Yht7hu3Ro9cnbz+ut8so8m6DZCjzb7jztF4qW01L9cgLuKZCw3TioReuvWrPvSrl18+3tIbz30aGRncwSWnU8wN5e2IBHagbp3T7qcpKIo+4DKBN0K5vbtTFUceihtgFYE3YWv3OmPeGyLlp07w/3eANC2LV0mhx4a+3znngscdpj/ecvK/I9x+9BFnHLgAEe/v/suX996i+2wPPhg/BF6mzacyCNJqj5Ct+TlMVJfvZp1kwH2Tp91FkeZTZnCwj3HHRe7aI6bGTPY2dK9u7Munru/oiixKS6m9dBODO2loIBCvXgxcOqpdI4A0aPleCa4cPP66xwtamuwW/LyeD3brmgR+mWXRS9Re8899KF7cY8UHTEivGbMwoUcLAnQC9++vbPt+OOdkbKVsWcPO4yT1KnqI+h+nH02vaxr19KK9OCDzLU1a8ZHmLfeYg/52WdHdo5MmEAhf/dd7l9Swjvv0Ufzj0xRlOQ59lg+RUfjrbeAadMic8J16zKfftppzr42PSMSv6D/9htf/SaPePNNXhugsDdoELlPaakz0tRL/fqONdJNfj6F2m90u33KqKjgzcZdIvzLL/nUEA8ffMBrL1gQ3/5eRCRtP8ccc4wkxe7dIlOmiKxd66x75hkRQGS//UTy8kSOPFJk61Zu+9e/uO2MM0R27HCO+flnkbp1RS691P86FRUiEyaIvPqqs/8dd4iUlCTXbkX5X2f6dP4vfvKJs+7440XOOstZnjZNZORIkfJykWuuEXnjjcrP+8knPO8HH0RuO+ookXPPjX38jTeK1K/vv23KFJHHHot9/DPPiNx2m7M8dCjbs3s3X4cOdbadc45IvNo3aRKPX7AgbDWAORKHxlbvCN1Spw7N+u67ZteuvBNv3Mh0zOLFzF8BjMovvJApG3enSdOmrMX+8svhI7kApnp69mRZgu++47o33gAeeICRvvcJwEYIlVFRwfNFG5W2r9ABW8q+wubGY/Hkk7QM+tnwZs9mJGrp3p3FurKyOLFEr16Vt8Hmzm1tdTfNmlWelo3VKfr660ypxGLGjPDSJfYJxOpCZS6XaDn1jLctJsvhh9MZk5fHNMz8+c7EGRMnsmSmu9PCMngwO10ffNBZN28ec/MzZ9J+ZGcXueYa5vW/+go44wzgl18ozFddRQ/uihXOOdas8a+/MHQo83wPPBDcZ6+M999nb7+fXas6IQJ89plTI1vJDObMYVri/fej7zNrFgMuv4Ey+fn8P7Rs3sycNWPT+NpgBd1P/G3FxfJyasSkSZH7xKq2GG3GoC1b6KyZMiUyN9+wISeCtueM5XIpLub57QQgbjLethgUHTo4TpgWLcK/UDfNmzP39+67/ENatIidKNnZjByuuy782L59mQ8sLmbP+O7dHPr7668sCyrC6L1NG6BPn/A/yLlzWVy/cWNWYXvxxdif4aefeDP58cfw9RUVwN/+xlxgv37Mx+3axW07dnB48+rVzv4LF/KP74Yb4u9dLyuLfGrZV5SXc4KD007jP8i55zp/yC+/zH4Ov2HdXrZv56ARO5GCG5Ho/7CJtjWI81iCaldVYh0usax1VjCPO44C6N7X6wF//HG6UjZv5v+e30AgL3Xr8m9/2LDIbVbQS0r4lO4OvNxtKCvzv4FEsy1WVDAAWbs20j3z5z/zf9DWjXFriDdC372b13VPU2fxjqxNkJoj6Inwt78xqm/ShOJ81VWMzjtFmeO6e3eK8Y4dFJEhQ1irYfp01o5o2pSRwquvcvZvy9ix3LZoEYXYXYEN4DmvvJLn/OUX/lEPHUqhc0et114L/OtfnG7rlVe4ffJkbps2jU8Phx/OGwjA6GfsWOCbbyiQXtasYXuXLOG+q1axYFJREXvYS0ootA8/zJtLvDcFy08/RaakrGDv2UP72kknMU02cCAdAHv3ci7HPn1YyGjGjMhzTp/uLO/cyVF711/vVLmz51+/nrPMt2zJm52X776Lb2RiWRl/J126BOOOKi9nB37nztE75JJlwQJ2EHqDgWSIla4rK+PfeEEBBwRGIyeHQcXUqUyXujsmt20Lnw8hO5t/Y7YESLzzadar5x+4NW/Ov2H39HB+7bOfx0s8tsVonbf2f8WdcvFG6EcdBdx+O5903CUQADpihg/378iNh3gS7UH9JN0pui8oL0/8mNLS8OO7dhVp0YKdpuXlIqedJpKfL/LOO84+K1dGXnfBApEnn3Q6dSsquO3JJ0VefpnrWrUSWbSI6999V+TBB7nfzp0ir78usmoVt23ZwuU2bURyckSGD+d+5eUiRx8tcsAB7KgpLRX57Tce07u3SJ067LAqKBBp0oRtufNObl+1ip/NPgTn5op07Oh8LsuePSKvvSZyww0i/fs767t1E6lVS6RTJ3YGtW7NjiHLvfey83n79vDv9oQT2FE1b174dfbuZXsKCvh5H36Yny0rS2TiRK4TEXnxRX4PTZuy8/v440WKi7ltwQKRL77gZwf4e7PXmTqV7X/2WZG5c9nhVVbGbZdfzv0fftjnDyIKO3eKjB/P77NfP37WPXvCzzd4cPzn++03kRdeELn7bp5v5Mjwzv6VK0VatuR5Dz5YZMOG+M8t4vz9ifB337gxO/V2747c9447eJ3nn499zn/8g/v16hW5DeDvyDJsGNcVF/N17NjE2u/l559Fli8X2biR53viich9PvmE36ef4aFXL5osvOzaxfM9+CD3OftsZ9u0aezoXbNG5LPP+GpZvFhkzhznuqtXi3z/Pc81bBi//1tu4f/It9/6fiTE2Sn6vyvoQbB0KZ00P/3E5TVrRBo2pKi7xcrNQw+JZGfzq7/gAkdk3XzxBc9x//3xt2X5ckeAbQ/5J5+I3HMP/zFXraIbqEsX549SRGTyZC5feWX4P7YI/+ieflrkr3/lzWrJEq4fO1bkpJMosIBIvXo8rz3+ww8pZr16ifTsKXLVVbzRxGLsWJ7rxReddVOmiFx2mciZZ3Lbyy/z+zrmGBFjKOZuZs0SadtW5LDD+E/k5qSTnLbedhudEEuXinz9Ndfn5TnfHyAye7Zz7IUX8gbovTlbFi8W+fJLR2QHDOA5atUSadTIEVobRPTvz21R/nll+XL+Dfz6a/j5jKHYAiIHHcTz7dxJ8WnQgDeRoqJwMYnFzJki7drxpn/jjRS3igqRQYN4jf3352cfMIC/+x07eMO8+urKz22Dj2ifb/NmZ9m60ubOje9mES8//MDzjRyZ2HG7d/v//5aVMVDIzhZZvz582wsv8FrLlkU/b2kpv7/TTuPyqaeKnH8+NcH9NzhmTETAqYJeVSxcyH8UrzhatmwROeUUClUsO+S6dU7kGS8rV4q8/bb/tu3bRW6+WeSII0ROPplRr2XFisSeWMaP5x9lv360jbmfXJJlypRw0d+2jX/whYWM2t02sF27IgXbUlrq/1lmzxZ56SVGbSLhvx97o1i6lP+YS5eGH7t6NW+w7mjzgQdEhgyhmNqbwF13cdu6dbyplZbyOtOni4wb5xy7datIs2a8Udl23Hcfo197wwWcG/qKFbTy7d3L/WfO5GextG0r8vnn4Z9r507u4/6cboEdNUqkdm3eGHr35lPh5MnO9o8+YsR46KFs66xZXL95s3/kngrjx/PzjhoVeVNPhaVL+RkWLvTfPm4cb07vvScyYwaf0Pyiee85H388cv3LL7PtZ53FG6D7b+izz/j/fv753Mfaordu5dPEfvvRSr11q8hf/iJy0UUq6IqyT/n3v0V+/3tnuVUr/vsUFfEf/PXXowuHH6+9xvSLpVkznq9zZ0Zsb74ZeWPxo6LC/8b/2GNOlH3uubw5tmjh3Hx79eIT4i+/cDnajTAd7N1LMV2/nqmHaE8uQfPAA3xicz+ZHXdccgFKcbHI4YczAm/Xjk9Ylmuv5bmbNRP5+9+ddJ5l8WInJReFeAXdcN/0UFRUJHPmzEnb9RRln+Kd3b06na+8nKOlp0+n60kEuOQSVjLMz+dkzC1aaI2k3bvpXMnOpkmiU6f4O2XjZeNGOsi6dEn63MaYb0SkqNL9VNAVRVGqN/EK+v+mbVFRFKUGooKuKIpSQ1BBVxRFqSGooCuKotQQVNAVRVFqCCroiqIoNQQVdEVRlBqCCrqiKEoNISVBN8b0NMYUG2OWG2NuC6pRiqIoSuIkLejGmFoAngZwNoBOAC41xkQpKK4oiqLsa1KJ0I8DsFxEVohICYCXAFwQTLMURVGUREmlCk1rAO7JINcCON67kzGmP4D+ocW9xpiFKVyzKmgCYHNVNyIBMq29gLY5HWRae4HMa/O+bG+beHYKuKxYJCIyGsBoADDGzImnwEx1ItPanGntBbTN6SDT2gtkXpurQ3tTSbmsA7C/a7kwtE5RFEWpAlIR9K8B/M4Y09YYkwOgD4A3g2mWoiiKkihJp1xEpMwYcz2A9wHUAjBORBZVctjoZK9XhWRamzOtvYC2OR1kWnuBzGtzlbc3rRNcKIqiKPsOHSmqKIpSQ1BBVxRFqSGkRdAzoUSAMWZ/Y8wMY8xiY8wiY8xNofWNjDHTjDHfh14bVnVb3RhjahljvjXGvB1abmuMmR36rl8OdVhXG4wxDYwxk40xS40xS4wxXTLgO74l9Dex0BjzojEmr7p9z8aYccaYn93jPKJ9r4Y8EWr7d8aYo6tJe4eH/i6+M8a8boxp4No2JNTeYmNMj3S3N1qbXdsGG2PEGNMktFwl3/E+F/QMKhFQBmCwiHQCcAKA60LtvA3AhyLyOwAfhparEzcBWOJafhjAYyJyMIBfAFxdJa2KzuMA3hORDgCOANtebb9jY0xrADcCKBKRQ0EDQB9Uv+/5OQA9Peuifa9nA/hd6Kc/gJFpaqOb5xDZ3mkADhWRwwEsAzAEAEL/h30AdA4d80xIV9LNc4hsM4wx+wM4C8Bq1+qq+Y5FZJ/+AOgC4H3X8hAAQ/b1dQNo9xsAzgRQDKBlaF1LAMVV3TZXGwvBf9TTAbwNwIAj1Wr7ffdV/QOgPoCVCHXGu9ZX5+/YjohuBLrC3gbQozp+zwAOBLCwsu8VwLMALvXbryrb69nWG8DE0PswzQCddV2qw3ccWjcZDE5+BNCkKr/jdKRc/EoEtE7DdZPGGHMggKMAzAbQXEQ2hDb9BKB5FTXLjxEAbgVQEVpuDGCbiJSFlqvbd90WwCYA40Npon8bY/JRjb9jEVkH4F9g9LUBwK8AvkH1/p4t0b7XTPif7Adgauh9tW2vMeYCAOtEZL5nU5W0WTtFPRhj6gF4FcDNIrLdvU14q60WPk9jzHkAfhaRb6q6LQlQG8DRAEaKyFEAdsGTXqlO3zEAhPLOF4A3o1YA8uHz2F3dqW7fayyMMXeAKdCJVd2WWBhj6gK4HcDdVd0WSzoEPWNKBBhjskExnygir4VWbzTGtAxtbwng56pqn4euAHoZY34EK12eDuanGxhj7ICx6vZdrwWwVkRmh5YngwJfXb9jAOgOYKWIbBKRUgCvgd99df6eLdG+12r7P2mMuRLAeQD6hm5CQPVtbzvwRj8/9H9YCGCuMaYFqqjN6RD0jCgRYIwxAMYCWCIij7o2vQngitD7K8DcepUjIkNEpFBEDgS/049EpC+AGQAuCe1WbdoLACLyE4A1xpj2oVVnAFiMavodh1gN4ARjTN3Q34htc7X9nl1E+17fBHB5yIlxAoBfXamZKsMY0xNMIfYSkd2uTW8C6GOMyTXGtAU7Gr+qija6EZEFItJMRA4M/R+uBXB06O+8ar7jNHUknAP2Wv8A4I6q6MyIo40ngY+k3wGYF/o5B8xLfwjgewDTATSq6rb6tL0bgLdD7w8C/9iXA3gFQG5Vt8/T1iMBzAl9z1MANKzu3zGA+wAsBbAQwH8A5Fa37xnAi2COvxQUlqujfa9g5/nTof/HBaCDpzq0dzmYd7b/f6Nc+98Ram8xgLOry3fs2f4jnE7RKvmOdei/oihKDUE7RRVFUWoIKuiKoig1BBV0RVGUGoIKuqIoSg1BBV1RFKWGoIKuKIpSQ1BBVxRFqSH8P0Y5ERqKqIx1AAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "150" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Plot loss and accuracy \n", "episodes = list(range(0, 150))\n", "\n", "loss = [2.750,1.9326,1.599,1.4933,1.3641,1.3544,1.2858,1.2228,1.2695,1.165,1.2079,\n", " 1.0799,1.1265,1.242,1.2760,1.219,0.966,0.9580,1.0700,1.1492,1.1680,1.0672,\n", " 0.9353,1.0579,0.8656,1.0098,0.837,0.8859,1.0678,0.9264,0.8049,0.9255,0.8928,\n", " 0.9739,0.8459,0.8857,0.9969,0.9506,0.9345,0.960219,0.831869,1.0556, 0.8367,\n", " 0.8366,0.8056,1.006,0.8444,0.983,0.9342,0.8919,0.8765,0.8173,0.8173,0.8597,\n", " 0.877,0.8935,0.8714,0.8827,0.9247,0.9506,1.0173,0.8194,0.9933,0.8126,0.8047,\n", " 0.9586,0.8423,0.969,0.9432,0.8538,0.9458,0.8530,0.8371,0.9035,0.7926,0.8675,\n", " 0.8354,0.8754, 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,\n", " 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,0.7918,0.9274,\n", " 0.7951, 0.7180,0.6970,0.7397, 0.7380,0.8324,0.7980,0.8889, 0.8174, 0.8040,0.796,\n", " 0.9588, 1.750, 2.289, 6.869,0.908,0.927,6.778,0.895,6.555,0.788,0.701,6.674,2.567,\n", " 2.43,2.234,1.070,3.657,2.296,0.889,0.8122,6.668,6.516,0.774, 0.730,0.739,0.831,7.99,\n", " 6.665,0.847,0.801,6.596,0.756,0.754,13.629,0.728,0.771,1.055,0.734,0.879,0.810,6.769]\n", "\n", "plt.plot(episodes, loss, 'r--')\n", "plt.axis([0, 150, 0, 12])\n", "plt.show()\n", "len(loss)\n" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XmYFOW1x/HvERAEFxQnSkDFGNSgN2qcENyNKxoDiZobokZNVNRHr5p4r49bcpXodUsUjVuIG+4aRUFEjXHfdVBAFpcRBUQQBAURZZtz/zjd6ZmmZ7pmpmd6pvh9nqef6ap6u+pUV8+pt956q8rcHRERSZe1yh2AiIiUnpK7iEgKKbmLiKSQkruISAopuYuIpJCSu4hICiVO7mbWwczeMrOxBaZ1NrP7zKzazF4zsz6lDFJERBqnMTX304Fp9Uw7Dvjc3b8LXAVc1tzARESk6RIldzPrDfwEuKmeIoOBkZn3DwD7mpk1PzwREWmKjgnLDQfOAtarZ3ovYBaAu680s0VAD+Cz2oXMbCgwFKBbt247b7vttk2JWURkjTV+/PjP3L2iWLmiyd3MDgHmuft4M9u7OUG5+whgBEBlZaVXVVU1Z3YiImscM5uRpFySZpndgEFm9hFwL7CPmd2ZV2Y2sFlmwR2BDYAFiaMVEZGSKprc3f0cd+/t7n2AIcDT7n5UXrExwDGZ94dnyuiOZCIiZZK0zX01ZjYMqHL3McDNwB1mVg0sJHYCIiJSJo1K7u7+LPBs5v0fa43/BvhFKQMTEZGm0xWqIiIppOQuIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkruIiIppOQuIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkruIiIppOQuIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkWTu5l1MbPXzWyimU0xswsLlDnWzOab2YTM6/iWCVdERJJI8gzVZcA+7r7EzDoBL5rZY+7+al65+9z91NKHKCIijVU0ubu7A0syg50yL2/JoEREpHkStbmbWQczmwDMA55099cKFDvMzCaZ2QNmtllJoxQRkUZJlNzdfZW77wj0Bvqb2fZ5RR4B+rj794EngZGF5mNmQ82sysyq5s+f35y4RUSkAY3qLePuXwDPAAPzxi9w92WZwZuAnev5/Ah3r3T3yoqKiqbEKyIiCSTpLVNhZt0z79cB9gfeySvTs9bgIGBaKYMUEZHGSdJbpicw0sw6EDuD+919rJkNA6rcfQxwmpkNAlYCC4FjWypgEREpzqIzTOurrKz0qqqqsixbRKS9MrPx7l5ZrJyuUBURSSEldxGRFFJyFxFJISV3EZEUUnIXEUkhJXcRkRRSchcRSSEldxGRFFJyFxFJISV3EZEUUnIXEUkhJXcRkRRSchcRSSEldxGRFFJyFxFJISV3EZEUUnIXEUkhJXcRkRRSchcRSaGiyd3MupjZ62Y20cymmNmFBcp0NrP7zKzazF4zsz4tEayIiCSTpOa+DNjH3XcAdgQGmtmAvDLHAZ+7+3eBq4DLShumiIg0RtHk7mFJZrBT5uV5xQYDIzPvHwD2NTMrWZQiItIoidrczayDmU0A5gFPuvtreUV6AbMA3H0lsAjoUWA+Q82sysyq5s+f37zIRUSkXomSu7uvcvcdgd5AfzPbvikLc/cR7l7p7pUVFRVNmYWIiCTQqN4y7v4F8AwwMG/SbGAzADPrCGwALChFgCIi0nhJestUmFn3zPt1gP2Bd/KKjQGOybw/HHja3fPb5UVEpJV0TFCmJzDSzDoQO4P73X2smQ0Dqtx9DHAzcIeZVQMLgSEtFrGIiBRVNLm7+yRgpwLj/1jr/TfAL0obmoiINJWuUBURSSEldxGRFFJyFxFJISV3EZEUUnIXEUkhJXcRkRRSchcRSaHyJffq6rItWkQk7cqX3Bctgq+/LtviRUTSrLzNMp9/XtbFi4ikVXmT+8KFZV28iEhaqeYuIpJC5UvunTtDTU3ZFi8ikmblS+7bbw977VW2xYuIpJn6uYuIpFD5kvt778H115dt8SIiaVa+5L50KUyZUrbFi4ikWfmSe8eO6i0jItJCkjwgezMze8bMpprZFDM7vUCZvc1skZlNyLz+WGhedXTooH7uIiItJMkDslcCZ7r7m2a2HjDezJ5096l55V5w90OSL1k1dxGRllK05u7uc9z9zcz7L4FpQK9mL7lrV+jbt9mzERGR1TWqzd3M+gA7Aa8VmLyLmU00s8fMbLt6Pj/UzKrMrGr+2mvDnXc2OmARESkucXI3s3WBB4Ez3H1x3uQ3gS3cfQfgr8DDhebh7iPcvdLdKysqKpoas4iIFJEouZtZJyKx3+Xuo/Knu/tid1+SeT8O6GRmGzc404ULYZtt4ta/IiJSUkl6yxhwMzDN3a+sp8ymmXKYWf/MfBc0OGP3uJBJPWZEREouSW+Z3YBfA2+b2YTMuHOBzQHc/UbgcOBkM1sJfA0McXdvcK4dOsTfzz+HLbdsSuwiIlKPosnd3V8ErEiZa4FrG7fkzKJVcxcRKbnyXaFau+YuIiIlVb7k3qkTHHAAbLhh2UIQEUmrJG3uLbTkjvDEE2VbvIhImul+7iIiKVTe5D5gAJy+2n3IRESkmcqb3JcsgY8/LmsIIiJpVN7kvuGG6gopItICypvcN9pIXSFFRFqAau4iIilUvq6QAHvuCd26lTUEEZE0Km9y/+1v4yUiIiVV/n7u7vESEZGSKW9yHzUKOneGd98taxgiImlT3uTetSusWKEeMyIiJVb+rpCgHjMiIiVW/q6QoJq7iEiJqeYuIpJC5U3u3bvDiSfCdtuVNQwRkbRJ8oDszczsGTObamZTzGy12zhauMbMqs1skpn9INHSO3SAG2+EffdtQugiIlKfJDX3lcCZ7t4PGACcYmb98socBPTNvIYCNySOYOVK+OqrxMVFRKS4osnd3ee4+5uZ918C04BeecUGA7d7eBXobmY9E0UwYAD88peNi1pERBrUqDZ3M+sD7AS8ljepFzCr1vDHrL4DwMyGmlmVmVXNnz8/RnbvrhOqIiIllji5m9m6wIPAGe6+uCkLc/cR7l7p7pUVFRUxUrf9FREpuUTJ3cw6EYn9LncfVaDIbGCzWsO9M+OK021/RURKLklvGQNuBqa5+5X1FBsDHJ3pNTMAWOTucxJFkK2519QkjVlERIpIcsvf3YBfA2+b2YTMuHOBzQHc/UZgHHAwUA0sBX6TOIIDDoD114fly6FLl0aELiIi9TEv0+12KysrvaqqqizLFhFpr8xsvLtXFitX/vu5A8ydCzNmlDsKEZHUaBvJvbISzj+/3FGIiKRG20ju220HU6aUOwoRkdRoO8l92jRYtarckYiIpELbSe7ffAMffljuSEREUqHtJHeAqVPLG4eISEq0jeT+H/8BI0fGiVUREWm2JBcxtbxu3eDoo8sdhYhIarSNmjvAe+/B6NHljkJEJBXaTnIfMSLu664eMyIizdZ2kvt228GyZTB9erkjERFp99pWcgddzCQiUgJtJ7n3yzyWVcldRKTZ2k5yX3dd2GILJXcRkRJoG10hs0aPhp7JnqstIiL1a1vJfYcdyh2BiEgqtJ1mGYhH7V1yCVx7bbkjERFp19pWcl9rLXjpJfjDH+K5qiIi0iRJHpB9i5nNM7PJ9Uzf28wWmdmEzOuPzYro4ovhiy/giiuaNRsRkTVZkpr7bcDAImVecPcdM69hzYpohx3giCNg+HCYM6dZsxIRWVMVTe7u/jywsBViyRk2DFasgIsuatXFioikRana3Hcxs4lm9piZbVdfITMbamZVZlY1f/78+ue21VZw5ZVw1FElCk9EZM1i7l68kFkfYKy7b19g2vpAjbsvMbODgavdvW+xeVZWVnpVVVXjIxYRWYOZ2Xh3L/rwi2bX3N19sbsvybwfB3Qys42bO18AZsyA//5vWLq0JLMTEVlTNDu5m9mmZmaZ9/0z81zQ3PkCkdz/8he4666SzE5EZE2RpCvkPcArwDZm9rGZHWdmJ5nZSZkihwOTzWwicA0wxJO09SSxxx6w445w9dVQolmKiKwJit5+wN1/VWT6tUDLXFJqBqefDr/5DTz9NOy7b4ssRkQkbdrWFaqFDBkCFRVwzTXljkREpN1o+8m9Sxc46STYdddompkxA84+WydZRUQa0LbuClmf886Le82Ywfvvw2WXRZK/++4YJyIidbT9mjtA586w6abxfr/94P/+D+69N5K8iIispn0k93xnnx1t8eeeC48+Wu5oRETanPaZ3M3g5pthp53g/PPjPvAiIvJv7TO5A3TtGo/le/zxuA98bVVVze8XP2cO3HlnbnjZsubNT0SkFbXf5A7QuzdsskncQfLCC+Ok60UXwQ9/WDcxN9aIEbDNNnDiifDZZ3DddVBZCQ3d7ExEpA1p38k9a+LEeMjH1lvHU5yOPBIGDoShQ6N23xiTJkVS/+EPY74bbwz9+sEHH8RFVJ991jLrICJSQulI7pWVcOutsGRJJPk77oANN4QXXoiTrytXRrmvv44a/uzZ9c/r4othvfXggQfgu9+NcT/+MYwZA+++G238IiJtXDqSO0RtffHi6EFjBh07RqJ+5x0YOTLKdO4MF1wQtftC5s2Dhx+GU0+NnUNt++0H++8Pzz/foqshIlIK6UnuAJ061R3++c/hRz+C44+HyZPjxOuJJ8bFT4Xaz7/1rdgZnHlm4fnvskvU+ht7dezy5TBuHLz8cuM+JyLSROlK7vnMchc63Xhj/D399Oj58re/1S2bbbrZckvo0aPw/H73O1i4MHrqJDFxIhx7bJz0/clP4MAD4dNPG70aIiKNle7kDrDXXjBrVu7GY9/7Xpxsve66qFFnnXpq1PQb6kLZtSt06JB82WPGwIMPwuDBcMstsbzHHmvaeoiINEL6kztEl8nafeHPOCNuSDZ9egyPGAE33QQ9exa/V82ll8aOIIk//CGaf267LW5b/OGHUZMXEWlha0Zyz3fAAVBdHb1hTjst2uH33z8SdzEffRR96Bu6KvaOO+DFF+N9ly658d/+dvzN7lRERFrImpnczaJ55d574a9/jbb0sWNh/fWLf3bAAFi0KE68FjJ5MpxwAlxxReHp48bFTuXpp5sev4hIEWtmcodoW3/uubhHzZVXJm9L32WX+PvKK4XnecIJsMEG8Pe/F/78PvvA5pvDWWet3r4/cyY88UTydajt5Zej+emjj5r2eRFJlSTPUL3FzOaZ2eR6ppuZXWNm1WY2ycx+UPowW4BZJODf/rZxn9t66+gDXyi5P/44vPpq9K//1rcKf75LFzjnHBg/Hl56qe60HXaIk71ffNG4mCCOFGbPjp45bdGjj6qnkEgrSlJzvw0Y2MD0g4C+mddQ4Ibmh9WGmcGhh8JGG9Ud7x4XSPXpA8cc0/A8jjoKuneHa/MePXvccfH3n/9sXEw1NbFTOfxwWGedxn22NSxcCD/9aZy4TruZM+Grr8odhUjx5O7uzwMLGygyGLjdw6tAdzPrWaoA26SbboLLL687rqYmkvall65+MVW+bt3iiGHcOPjyy9z4yy6LnUb+PeqfegoGDYobpBVSVQVz58a9b/7nf+IIoi154YXY+d19dzSFJfHKK3BDO6snvPEGbLFFXCUtUm7uXvQF9AEm1zNtLLB7reGngMp6yg4FqoCqzTff3Nu9FSua/tl589znz4/3M2e6/+//ui9Y4H7EEe4VFe6rVuXKmrmD+4cfFp7Xuee6d+gQ8+zVy/2gg5oeVzEff+z+2WeN+8wZZ0T84P773yf7TLb87NmNj7EcZs1y79kzYp47d/XpQ4a4X3RR68clqQNUeYK83aonVN19hLtXuntlRUVFay66tJYtixraMcfEvWZGjYrafH0160IqKuKOkwDDh8etihcvjhOyF1yQm1d1daS5iy+OJp9Ctt8++u5XVMQRweOPR/NAUp98Eid6G7o9wrRpcXTyi19A//4wZUrhcl99VffiMIBnn4357703PPNM8Xiyt3fo0gXWXbfutKuuavydPlvaV1/FkdWSJfD226uf95g5M3pmFXuwTHaXVt+0efNKF7OkX5I9AA3X3P8G/KrW8LtAz2Lz3HnnnVt299bS9twzV7sE9z593Jcta9w85s5133bb+PyQIYXLnHOO+1prRY35q6/cV65seJ4ffRQ1/T/+MVkMNTXuBx4YMbz8cuEy8+e7r7uu+1lnub/yivsmm7ivt577JZe4v/12zMPd/fzz3ddZx/1Xv8p9duHCiGfYsHiZxRFKQ8aNi3gef7zu+M8/j/mfcIL70qVxtPP118nWsyX913/FNnr00TiaO/FE91tvzU0fPTr3O3n11frnc8QR7rvvnvs+a/vXv9zXXtv9+OPd33+/5KtQ1KpV7g88EL/B1jBxYvy2ZDUkrLmXIrn/BHgMMGAA8HqSebb75L5ypfsHH7g/8YT7dde5jx/f+HmsWpX7p6/9+blz3R96KJbRs6f7IYe4v/SSe9eu7s89V3ceU6eu3kwycKB7796Fm42mTau7g7jhhlj+tdfWH+fZZ0dSnjIlhmfNct9jj1zs//pXjL/oIvftt49EV7sJ6cMP3efMcX/hhSg/alTD38vpp7t36eL+6afuf/tb7LDc3a+5Jj5fVeU+Zky833XX8jTdTJiQa1Z75JH4DWT16+e+//51y8+a5d6xY6xDfU45Jdbpqady4154IX4L06e7n3xyfC8bbOD+ySd1P9vSifC88yK2yy9v2eW4x+82+9tqbStWxPdfaAfbRpQsuQP3AHOAFcDHwHHAScBJmekGXAd8ALxNPe3t+a92n9xL5bHH3C+8sO64iy7Ktd1OmhSJZPFi986d3U87rW7ZPfd033HHuuNGj3Y/8sio6db2xBMx3+9/PxLie+/FDuOAA+LHPHFi3Rqne7Tjd+tW+Mhi1iz3v/+9bu151qxo/z/zzNXLL1vm/r3vud9xR4NfiS9eHEcRM2ZEvBdfHPFtt517ZWWu3D/+EbFVVLg/+WTD8yyVlSvd//Sn2IHVd3R05plRy16ypG6SKHau4uuvY2e+zz65cXvt5b755rkd9bRp8Tv4z//MlckeGTzyiPvddzd8dFf7XE5S48blzvs8+2zjP99Y2e0OcT6qNWUrEI880rrLbYSS1txb4qXk3oA334xNk59oBw2KGnn2H/SzzyLJnH9+svnuvXc0qfz855EALrnEfcMNo8nH3f2kkyJxfPpp7jNnnRX/2FOnJo//l7907949ktVZZzXvH2W33eJo4MUX4zu56aa606dOjaRv5j5iRNOX4x7fyYsvRu305ZdXr73Nneu+334RxxFHrL7zzHryySgzdmzEVFlZvCnq/PPj6OzPf/Z/N5FlfwdXXFG37LBh7p06uVdXRzPJFlvEdzBqVJS/4YbCy7j66tgRvvdeoq/D3eOIYcMNo0LQWk0y7lGpgag8tJaamlwz6YEHtt5yG0nJvT2rqcnVlN59Nzd+5Ej/d7vtihXuxx0Xw2+8UXg+EybkDtdfey3K/uUvdcvUbtKYOjXKDBuWG7fPPpHIGuPdd6MJZ8GCXHt7/vrVV7scPTpqxtnzF9deGzH9+c/u/ftHbTjfkiXRZJFNWk8+GUc4SZJRdke5fLn7ppvmaowQCfPRR2P6ww9Hc0iXLpFwGjps/+abOCI65ZRIElttFeW/+Saa2PKbZt55J7eOX37p3qNHnLf49a/jyCR/J/LNN1GDd4+eUuD+/POxjB//OJLxvHl1PzNmTO43deWVq8e8dGnhdXnoofheqqtj/pMnR3NkqeR/j59+6v7FFzG+Vy/3ww4r3bIKLa+25cvdhw+PSlD+/14bouTe3vXvv/oPbOHCaLf9/e8jSUC0hxf6wS5bFrW0Qw6J4RNOiNr04sUNL3fgwFhGtjY/c2bdmnxjPPxwLvFkTZ8eyeKeewp/5mc/i2aI7DrNnRtHJ+eem3y5118fnznwwEiEhUyd6n7wwXWbQC691P2++3LNTf37507qTpjg/pvfRNNVEscfHydaO3aMo5esHXeMo5Hazjsv4s22o7/6apw07dRp9Wa42rI7haOPzo2bMiWWeeSRuR3hhAmxk9h558KJ+euvY6dw1FG57V5dnZue3UkuXRo7tzPOSPYd1GfWrNjBHXJI/BbeeSc37eST3TfeOGJ68033RYuatyz3mM9JJ7l/5zvxP/HMMw2XnzMnfm9N/d23MCX39m7mzOidkO+WW+KQ9YIL6j/8zrrwwtjEkydHrWTChOLLffzx+Exzt8+8ebkacO0Eu2KF+/rrRzIZPjyaCrI9SJYvj144Q4fWndcBB9RtY07i5ptj2YcdtvqJ5TvvjGTXo0ck3oZqc805sXb77bkjrawLLogadDZxrFoVzSr5zQAvv+zet2/dJJvvttuimS2/X/3ZZ8dys9/ZqadGLbj2UVp+4rr//jhP0LVrfGdrreX+z3+uvsyBA9232abB1W7Q7NnuG20U8W21VRxJ1t6Rd+vmfuyxTZ9/IaNGRW+vQYOi2aVz5zhCrO2TT6LJrzWbnppIyV2iTb5r17o1u2JqaqLnR5IdQUOWL4+fV8+eq0878kiv0/yRrdk/+2wMP/hg3fJN7e541VUxv5NPzo0bOjTG7bFHrpbaUgYPjqRS+yTmW2/F8rPnB557LobvvHP1zxfbsdTUFG5OWbUqmqayO5VVq+qemLz//jgqqKqKpqNsop8+PRI7xJFHoSaw4cNj+vTpDcdWX7wHHhjdWfN7l11/fST2bG+orMsvjx11U2R7My1bFr9H9/if6N8/jtJqy1aEsjvTmppoyho7tmnLbkFK7hKOPz4280MPtf6yp08v3E1x1apoYlqwIP7Zskmsd++I9YsvShfDn/7k/tOf5oa//e2o2Tbn6uKkDj00mlxqq6mJayLWWSe+h3/8w33rrQsn0paSvW5hvfXi+37iibrTFy6s/7PZpqBiR42FrFoVzTGFTpI+/3wc0eU3We2+e9OOIj/5JI7M8s8xuceRZPY3N2JEnNfp1avu0VNNTSy3X7821y1SyV3CrFmR3GbMKHckxWUvdCq12ucZGkpcrWXChLoJrhzJ45xz/N/nbBoju3MaPLjhcuPHx/mFPfeMpsRi53rco1kmv1dR7W7BSTz0UJxsHjgwdqDFTopmz13B6k01t94a4++6K9myW4mSu4jUb+nSOGpoyhHMW281fKKzpib65/fuHecNssmz0DmkYsaPj8/efnvh6dOnR/fUjz6KdenRI7e8hi7Mqx3r66/HxXL51wCsWBEXya2/fml7CDVT0uRuUbb1VVZWelVVVVmWLSLNtHJlPFD+88/jyWQbbBCPkTz0UBgyJJ4tsPbacf+kV16Je+sMGgT77de45dTUxLON118f3n8/xh15ZCxz003h4YcjltGjYa+9YM6ceAzm4sUwbFjxZyIXM2NGPGdh223j0ZkdOzZvfiVgZuPdvbJYufJHKiLtT8eO8fD3rl1hq60i2U6aBDvtFNN79cqV3XXXeDXFWmvFTfEmTsyN69AhbnZXVQXf+Q7cdRf07RvTevaM216XyhZbwC23xM3h2kBibwzV3EVE2pGkNfc19xmqIiIppuQuIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkruIiIppOQuIpJCiZK7mQ00s3fNrNrMzi4w/Vgzm29mEzKv40sfqoiIJFX0eloz60A8AHt/4gHZb5jZGHefmlf0Pnc/tQViFBGRRkpSc+8PVLv7dHdfDtwLDG7ZsEREpDmSJPdewKxawx9nxuU7zMwmmdkDZrZZSaITEZEmKdUJ1UeAPu7+feBJYGShQmY21MyqzKxq/vz5JVq0iIjkS5LcZwO1a+K9M+P+zd0XuPuyzOBNwM6FZuTuI9y90t0rKyoqmhKviIgkkCS5vwH0NbMtzWxtYAgwpnYBM+tZa3AQMK10IYqISGMV7S3j7ivN7FTgCaADcIu7TzGzYcTjnsYAp5nZIGAlsBA4tgVjFhGRIvSwDhGRdkQP6xARWYMpuYuIpJCSu4hICim5i4ikkJK7iEgKKbmLiKSQkruISAopuYuIpJCSu4hICim5i4ikkJK7iEgKKbmLiKSQkruISAopuYuIpJCSu4hICim5i4ikkJK7iEgKKbmLiKSQkruISAolSu5mNtDM3jWzajM7u8D0zmZ2X2b6a2bWp9SBiohIckWTu5l1AK4DDgL6Ab8ys355xY4DPnf37wJXAZeVOlAREUkuSc29P1Dt7tPdfTlwLzA4r8xgYGTm/QPAvmZmpQtTREQao2OCMr2AWbWGPwZ+VF8Zd19pZouAHsBntQuZ2VBgaGZwmZlNbkrQ7djG5H0nawCt85pB69x6tkhSKElyLxl3HwGMADCzKnevbM3ll5vWec2gdV4ztPV1TtIsMxvYrNZw78y4gmXMrCOwAbCgFAGKiEjjJUnubwB9zWxLM1sbGAKMySszBjgm8/5w4Gl399KFKSIijVG0WSbThn4q8ATQAbjF3aeY2TCgyt3HADcDd5hZNbCQ2AEUM6IZcbdXWuc1g9Z5zdCm19lUwRYRSR9doSoikkJK7iIiKVSW5F7sdgZpYGabmdkzZjbVzKaY2emZ8RuZ2ZNm9n7m74bljrWUzKyDmb1lZmMzw1tmbklRnblFxdrljrGUzKy7mT1gZu+Y2TQz22UN2Ma/y/ymJ5vZPWbWJW3b2cxuMbN5ta/FqW+7Wrgms+6TzOwH5Ys8p9WTe8LbGaTBSuBMd+8HDABOyazn2cBT7t4XeCoznCanA9NqDV8GXJW5NcXnxK0q0uRq4HF33xbYgVj31G5jM+sFnAZUuvv2RCeLIaRvO98GDMwbV992PQjom3kNBW5opRgbVI6ae5LbGbR77j7H3d/MvP+S+KfvRd1bNYwEflaeCEvPzHoDPwFuygwbsA9xSwpI3/puAOxJ9BbD3Ze7+xekeBtndATWyVzT0hWYQ8q2s7s/T/T8q62+7ToYuN3Dq0B3M+vZOpHWrxzJvdDtDHqVIY5Wk7lL5k7Aa8Am7j4nM2kusEmZwmoJw4GzgJrMcA/gC3dfmRlO27beEpgP3JppirrJzLqR4m3s7rOBPwMziaS+CBhPurdzVn3btU3mNJ15dPedAAABqElEQVRQbWFmti7wIHCGuy+uPS1zoVcq+qKa2SHAPHcfX+5YWlFH4AfADe6+E/AVeU0wadrGAJl25sHEju3bQDdWb75IvfawXcuR3JPcziAVzKwTkdjvcvdRmdGfZg/ZMn/nlSu+EtsNGGRmHxFNbfsQ7dHdM4fvkL5t/THwsbu/lhl+gEj2ad3GAPsBH7r7fHdfAYwitn2at3NWfdu1Tea0ciT3JLczaPcy7c03A9Pc/cpak2rfquEYYHRrx9YS3P0cd+/t7n2Ibfq0ux8JPEPckgJStL4A7j4XmGVm22RG7QtMJaXbOGMmMMDMumZ+49l1Tu12rqW+7ToGODrTa2YAsKhW8035uHurv4CDgfeAD4DzyhFDK6zj7sRh2yRgQuZ1MNEO/RTwPvAvYKNyx9oC6743MDbz/jvA60A18A+gc7njK/G67ghUZbbzw8CGad/GwIXAO8Bk4A6gc9q2M3APcU5hBXGEdlx92xUwogfgB8DbRE+isq+Dbj8gIpJCOqEqIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkruIiIppOQuIpJC/w9HlyYRyqvynQAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "100" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Plot loss and accuracy por 100 episodes, Loss is decrasing and accuracy is growing during the first 100 episodes \n", "episodes = list(range(0, 100))\n", "\n", "loss_Bolzman = [2.750,1.9326,1.599,1.4933,1.3641,1.3544,1.2858,1.2228,1.2695,1.165,1.2079,\n", " 1.0799,1.1265,1.242,1.2760,1.219,0.966,0.9580,1.0700,1.1492,1.1680,1.0672,\n", " 0.9353,1.0579,0.8656,1.0098,0.837,0.8859,1.0678,0.9264,0.8049,0.9255,0.8928,\n", " 0.9739,0.8459,0.8857,0.9969,0.9506,0.9345,0.960219,0.831869,1.0556, 0.8367,\n", " 0.8366,0.8056,1.006,0.8444,0.983,0.9342,0.8919,0.8765,0.8173,0.8173,0.8597,\n", " 0.877,0.8935,0.8714,0.8827,0.9247,0.9506,1.0173,0.8194,0.9933,0.8126,0.8047,\n", " 0.9586,0.8423,0.969,0.9432,0.8538,0.9458,0.8530,0.8371,0.9035,0.7926,0.8675,\n", " 0.8354,0.8754, 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,\n", " 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,0.7918,0.9274,\n", " 0.7951, 0.7180]\n", "\n", "plt.plot(episodes, loss_Bolzman, 'r--')\n", "plt.axis([0, 110, 0, 4])\n", "plt.show()\n", "len(loss)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Testing for 10 episodes ...\n", "Episode 1: reward: 210.000, steps: 488\n", "Episode 2: reward: 210.000, steps: 502\n", "Episode 3: reward: 210.000, steps: 490\n", "Episode 4: reward: 210.000, steps: 499\n", "Episode 5: reward: 210.000, steps: 499\n", "Episode 6: reward: 210.000, steps: 507\n", "Episode 7: reward: 210.000, steps: 502\n", "Episode 8: reward: 210.000, steps: 508\n", "Episode 9: reward: 210.000, steps: 498\n", "Episode 10: reward: 210.000, steps: 490\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Evaluate the algorithm for 10 episodes \n", "dqn.test(env, nb_episodes=10, visualize=True)\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "## Save the model \n", "dqn.save_weights('dqn_{}_weights.h5f'.format(env), overwrite=True)\n" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training for 20000 steps ...\n", " 503/20000: episode: 1, duration: 6.395s, episode steps: 503, steps per second: 79, episode reward: 320.000, mean reward: 0.636 [0.000, 10.000], mean action: 5.746 [0.000, 8.000], mean observation: 72.752 [0.000, 228.000], loss: 3.257421, mean_absolute_error: 0.159048, acc: 0.916667, mean_q: 0.257621\n", " 977/20000: episode: 2, duration: 3.642s, episode steps: 474, steps per second: 130, episode reward: 290.000, mean reward: 0.612 [0.000, 10.000], mean action: 5.719 [0.000, 8.000], mean observation: 72.752 [0.000, 228.000], loss: 3.080939, mean_absolute_error: 0.129520, acc: 0.909091, mean_q: 0.574435\n", " 1484/20000: episode: 3, duration: 3.882s, episode steps: 507, steps per second: 131, episode reward: 210.000, mean reward: 0.414 [0.000, 10.000], mean action: 5.767 [0.000, 8.000], mean observation: 72.777 [0.000, 228.000], loss: 2.102091, mean_absolute_error: 0.095685, acc: 0.901186, mean_q: 0.747267\n", " 1913/20000: episode: 4, duration: 3.272s, episode steps: 429, steps per second: 131, episode reward: 170.000, mean reward: 0.396 [0.000, 10.000], mean action: 5.823 [0.000, 8.000], mean observation: 72.835 [0.000, 228.000], loss: 1.989418, mean_absolute_error: 0.083000, acc: 0.920561, mean_q: 0.815084\n", " 2376/20000: episode: 5, duration: 3.713s, episode steps: 463, steps per second: 125, episode reward: 320.000, mean reward: 0.691 [0.000, 10.000], mean action: 5.847 [0.000, 8.000], mean observation: 72.747 [0.000, 228.000], loss: 3.513357, mean_absolute_error: 0.113861, acc: 0.911255, mean_q: 0.871983\n", " 3074/20000: episode: 6, duration: 5.392s, episode steps: 698, steps per second: 129, episode reward: 210.000, mean reward: 0.301 [0.000, 50.000], mean action: 5.785 [0.000, 8.000], mean observation: 72.815 [0.000, 228.000], loss: 3.051678, mean_absolute_error: 0.070624, acc: 0.901004, mean_q: 0.918323\n", " 3542/20000: episode: 7, duration: 3.588s, episode steps: 468, steps per second: 130, episode reward: 210.000, mean reward: 0.449 [0.000, 10.000], mean action: 5.818 [0.000, 8.000], mean observation: 72.801 [0.000, 228.000], loss: 2.255794, mean_absolute_error: 0.077195, acc: 0.925054, mean_q: 0.932350\n", " 4033/20000: episode: 8, duration: 3.789s, episode steps: 491, steps per second: 130, episode reward: 310.000, mean reward: 0.631 [0.000, 10.000], mean action: 5.782 [0.000, 8.000], mean observation: 72.778 [0.000, 228.000], loss: 3.197656, mean_absolute_error: 0.104132, acc: 0.902041, mean_q: 0.952392\n", " 4541/20000: episode: 9, duration: 3.851s, episode steps: 508, steps per second: 132, episode reward: 210.000, mean reward: 0.413 [0.000, 10.000], mean action: 5.766 [0.000, 8.000], mean observation: 72.755 [0.000, 228.000], loss: 2.144341, mean_absolute_error: 0.076660, acc: 0.915187, mean_q: 0.968539\n", " 4961/20000: episode: 10, duration: 3.187s, episode steps: 420, steps per second: 132, episode reward: 180.000, mean reward: 0.429 [0.000, 10.000], mean action: 5.757 [0.000, 8.000], mean observation: 72.831 [0.000, 228.000], loss: 2.168210, mean_absolute_error: 0.075031, acc: 0.918854, mean_q: 0.974386\n", " 5466/20000: episode: 11, duration: 3.914s, episode steps: 505, steps per second: 129, episode reward: 210.000, mean reward: 0.416 [0.000, 10.000], mean action: 5.774 [0.000, 8.000], mean observation: 72.774 [0.000, 228.000], loss: 2.134463, mean_absolute_error: 0.074312, acc: 0.916667, mean_q: 0.979671\n", " 6333/20000: episode: 12, duration: 6.608s, episode steps: 867, steps per second: 131, episode reward: 670.000, mean reward: 0.773 [0.000, 10.000], mean action: 5.800 [0.000, 8.000], mean observation: 72.280 [0.000, 228.000], loss: 3.926031, mean_absolute_error: 0.117255, acc: 0.899538, mean_q: 0.987075\n", " 7041/20000: episode: 13, duration: 5.367s, episode steps: 708, steps per second: 132, episode reward: 250.000, mean reward: 0.353 [0.000, 50.000], mean action: 5.816 [0.000, 8.000], mean observation: 72.741 [0.000, 228.000], loss: 3.204065, mean_absolute_error: 0.063446, acc: 0.923621, mean_q: 0.991700\n", " 7496/20000: episode: 14, duration: 3.446s, episode steps: 455, steps per second: 132, episode reward: 250.000, mean reward: 0.549 [0.000, 10.000], mean action: 5.791 [0.000, 8.000], mean observation: 72.812 [0.000, 228.000], loss: 2.789666, mean_absolute_error: 0.090360, acc: 0.907489, mean_q: 0.992939\n", " 8362/20000: episode: 15, duration: 7.989s, episode steps: 866, steps per second: 108, episode reward: 1050.000, mean reward: 1.212 [0.000, 400.000], mean action: 5.770 [0.000, 8.000], mean observation: 72.504 [0.000, 228.000], loss: 119.384644, mean_absolute_error: 0.161700, acc: 0.912139, mean_q: 0.995352\n", " 9086/20000: episode: 16, duration: 12.080s, episode steps: 724, steps per second: 60, episode reward: 540.000, mean reward: 0.746 [0.000, 200.000], mean action: 5.812 [0.000, 8.000], mean observation: 72.771 [0.000, 228.000], loss: 31.570024, mean_absolute_error: 0.113137, acc: 0.900415, mean_q: 0.997271\n", " 9673/20000: episode: 17, duration: 9.784s, episode steps: 587, steps per second: 60, episode reward: 400.000, mean reward: 0.681 [0.000, 10.000], mean action: 5.796 [0.000, 8.000], mean observation: 72.593 [0.000, 228.000], loss: 3.502021, mean_absolute_error: 0.107221, acc: 0.899317, mean_q: 0.997683\n", " 10166/20000: episode: 18, duration: 8.228s, episode steps: 493, steps per second: 60, episode reward: 370.000, mean reward: 0.751 [0.000, 10.000], mean action: 5.765 [0.000, 8.000], mean observation: 72.652 [0.000, 228.000], loss: 3.832467, mean_absolute_error: 0.111283, acc: 0.908537, mean_q: 0.998012\n", " 10654/20000: episode: 19, duration: 6.370s, episode steps: 488, steps per second: 77, episode reward: 320.000, mean reward: 0.656 [0.000, 10.000], mean action: 5.814 [0.000, 8.000], mean observation: 72.756 [0.000, 228.000], loss: 3.349404, mean_absolute_error: 0.096947, acc: 0.921971, mean_q: 0.998284\n", " 11809/20000: episode: 20, duration: 8.742s, episode steps: 1155, steps per second: 132, episode reward: 790.000, mean reward: 0.684 [0.000, 200.000], mean action: 5.813 [0.000, 8.000], mean observation: 72.312 [0.000, 228.000], loss: 21.521218, mean_absolute_error: 0.103877, acc: 0.908146, mean_q: 0.998769\n", " 12328/20000: episode: 21, duration: 4.060s, episode steps: 519, steps per second: 128, episode reward: 410.000, mean reward: 0.790 [0.000, 10.000], mean action: 5.817 [0.000, 8.000], mean observation: 72.713 [0.000, 228.000], loss: 4.036854, mean_absolute_error: 0.112332, acc: 0.920849, mean_q: 0.999011\n", " 12791/20000: episode: 22, duration: 3.509s, episode steps: 463, steps per second: 132, episode reward: 300.000, mean reward: 0.648 [0.000, 10.000], mean action: 5.784 [0.000, 8.000], mean observation: 72.779 [0.000, 228.000], loss: 3.292709, mean_absolute_error: 0.097944, acc: 0.917749, mean_q: 0.999212\n", " 13218/20000: episode: 23, duration: 3.230s, episode steps: 427, steps per second: 132, episode reward: 180.000, mean reward: 0.422 [0.000, 10.000], mean action: 5.770 [0.000, 8.000], mean observation: 72.836 [0.000, 228.000], loss: 2.161153, mean_absolute_error: 0.079270, acc: 0.894366, mean_q: 0.999332\n", " 13693/20000: episode: 24, duration: 3.610s, episode steps: 475, steps per second: 132, episode reward: 210.000, mean reward: 0.442 [0.000, 10.000], mean action: 5.743 [0.000, 8.000], mean observation: 72.797 [0.000, 228.000], loss: 2.205549, mean_absolute_error: 0.075120, acc: 0.913502, mean_q: 0.999399\n", " 14148/20000: episode: 25, duration: 3.446s, episode steps: 455, steps per second: 132, episode reward: 220.000, mean reward: 0.484 [0.000, 10.000], mean action: 5.820 [0.000, 8.000], mean observation: 72.789 [0.000, 228.000], loss: 2.478004, mean_absolute_error: 0.082266, acc: 0.909692, mean_q: 0.999479\n", " 14660/20000: episode: 26, duration: 3.895s, episode steps: 512, steps per second: 131, episode reward: 210.000, mean reward: 0.410 [0.000, 10.000], mean action: 5.812 [0.000, 8.000], mean observation: 72.770 [0.000, 228.000], loss: 2.106088, mean_absolute_error: 0.071048, acc: 0.919765, mean_q: 0.999558\n", " 15246/20000: episode: 27, duration: 4.474s, episode steps: 586, steps per second: 131, episode reward: 250.000, mean reward: 0.427 [0.000, 50.000], mean action: 5.850 [0.000, 8.000], mean observation: 72.829 [0.000, 228.000], loss: 4.014621, mean_absolute_error: 0.077885, acc: 0.904274, mean_q: 0.999617\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 15735/20000: episode: 28, duration: 3.696s, episode steps: 489, steps per second: 132, episode reward: 310.000, mean reward: 0.634 [0.000, 10.000], mean action: 5.771 [0.000, 8.000], mean observation: 72.758 [0.000, 228.000], loss: 3.248598, mean_absolute_error: 0.097741, acc: 0.905738, mean_q: 0.999672\n", " 16228/20000: episode: 29, duration: 3.708s, episode steps: 493, steps per second: 133, episode reward: 350.000, mean reward: 0.710 [0.000, 10.000], mean action: 5.805 [0.000, 8.000], mean observation: 72.750 [0.000, 228.000], loss: 3.624651, mean_absolute_error: 0.110654, acc: 0.898374, mean_q: 0.999747\n", " 16851/20000: episode: 30, duration: 4.736s, episode steps: 623, steps per second: 132, episode reward: 460.000, mean reward: 0.738 [0.000, 10.000], mean action: 5.846 [0.000, 8.000], mean observation: 72.560 [0.000, 228.000], loss: 3.761692, mean_absolute_error: 0.106135, acc: 0.922830, mean_q: 0.999828\n", " 17532/20000: episode: 31, duration: 5.188s, episode steps: 681, steps per second: 131, episode reward: 670.000, mean reward: 0.984 [0.000, 100.000], mean action: 5.808 [0.000, 8.000], mean observation: 72.472 [0.000, 228.000], loss: 11.580458, mean_absolute_error: 0.134679, acc: 0.917647, mean_q: 0.999885\n", " 18156/20000: episode: 32, duration: 4.759s, episode steps: 624, steps per second: 131, episode reward: 420.000, mean reward: 0.673 [0.000, 10.000], mean action: 5.845 [0.000, 8.000], mean observation: 72.688 [0.000, 228.000], loss: 3.403344, mean_absolute_error: 0.098658, acc: 0.921348, mean_q: 0.999923\n", " 18622/20000: episode: 33, duration: 3.539s, episode steps: 466, steps per second: 132, episode reward: 350.000, mean reward: 0.751 [0.000, 10.000], mean action: 5.828 [0.000, 8.000], mean observation: 72.751 [0.000, 228.000], loss: 3.878806, mean_absolute_error: 0.111795, acc: 0.911828, mean_q: 0.999939\n", " 19415/20000: episode: 34, duration: 6.008s, episode steps: 793, steps per second: 132, episode reward: 370.000, mean reward: 0.467 [0.000, 10.000], mean action: 5.782 [0.000, 8.000], mean observation: 72.591 [0.000, 228.000], loss: 2.458734, mean_absolute_error: 0.079877, acc: 0.909091, mean_q: 0.999953\n", " 19863/20000: episode: 35, duration: 3.390s, episode steps: 448, steps per second: 132, episode reward: 300.000, mean reward: 0.670 [0.000, 10.000], mean action: 5.824 [0.000, 8.000], mean observation: 72.778 [0.000, 228.000], loss: 3.323577, mean_absolute_error: 0.102114, acc: 0.906040, mean_q: 0.999960\n", "done, took 182.506 seconds\n", "Testing for 10 episodes ...\n", "Episode 1: reward: 210.000, steps: 505\n", "Episode 2: reward: 210.000, steps: 509\n", "Episode 3: reward: 210.000, steps: 495\n", "Episode 4: reward: 210.000, steps: 503\n", "Episode 5: reward: 210.000, steps: 499\n", "Episode 6: reward: 210.000, steps: 514\n", "Episode 7: reward: 210.000, steps: 505\n", "Episode 8: reward: 210.000, steps: 499\n", "Episode 9: reward: 210.000, steps: 499\n", "Episode 10: reward: 210.000, steps: 508\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#SARSA Agent -- Reinforcement Learning \n", "from rl.agents.sarsa import SARSAAgent\n", "sarsa = SARSAAgent(model, nb_actions, \n", " policy=None, test_policy=None, \n", " gamma=0.99, nb_steps_warmup=10, \n", " train_interval=1)\n", "sarsa.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])\n", "sarsa.fit(env, nb_steps=20000, visualize=True, verbose=2)\n", "sarsa.test(env, nb_episodes=10, visualize=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "loss_Bolzman = [3.257,3.080,2.102,1.]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }