{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# dqn / Sarsa PacMan gym comparisson " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import gym\n", "import numpy as np \n", "\n", "from keras.models import Sequential\n", "from keras.layers import Dense, Activation, Flatten\n", "from keras.optimizers import Adam\n", "import matplotlib.pyplot as plt\n", "\n", "from rl.agents.dqn import DQNAgent\n", "from rl.agents.ddpg import DDPGAgent\n", "from rl.policy import BoltzmannGumbelQPolicy , LinearAnnealedPolicy , EpsGreedyQPolicy\n", "from rl.memory import SequentialMemory" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "env = gym.make('MsPacman-v0')\n", "nb_actions = env.action_space.n\n", "nb_actions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Neural Network Model \n", "agents representation of the environment" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "flatten_1 (Flatten) (None, 100800) 0 \n", "_________________________________________________________________\n", "dense_1 (Dense) (None, 3) 302403 \n", "_________________________________________________________________\n", "dense_2 (Dense) (None, 9) 36 \n", "_________________________________________________________________\n", "dense_3 (Dense) (None, 6) 60 \n", "_________________________________________________________________\n", "dense_4 (Dense) (None, 9) 63 \n", "_________________________________________________________________\n", "dense_5 (Dense) (None, 6) 60 \n", "_________________________________________________________________\n", "dense_6 (Dense) (None, 9) 63 \n", "_________________________________________________________________\n", "dense_7 (Dense) (None, 6) 60 \n", "_________________________________________________________________\n", "dense_8 (Dense) (None, 9) 63 \n", "_________________________________________________________________\n", "dense_9 (Dense) (None, 3) 30 \n", "_________________________________________________________________\n", "dense_10 (Dense) (None, 9) 36 \n", "_________________________________________________________________\n", "dense_11 (Dense) (None, 3) 30 \n", "_________________________________________________________________\n", "dense_12 (Dense) (None, 9) 36 \n", "_________________________________________________________________\n", "activation_1 (Activation) (None, 9) 0 \n", "=================================================================\n", "Total params: 302,940\n", "Trainable params: 302,940\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "None\n" ] } ], "source": [ "# Next, we build a neural network model\n", "model = Sequential()\n", "model.add(Flatten(input_shape=(1,) + env.observation_space.shape))\n", "model.add(Dense(3, activation= 'tanh')) # layer 1: 3 cells with tanh activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(6, activation= 'sigmoid')) #layer 2 : 6 cells with sigmoid activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(6, activation= 'sigmoid')) #layer 3 : 6 cells with sigmoid activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(6, activation= 'sigmoid')) #layer 4 : 6 cells with sigmoid activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(3, activation= 'tanh')) #layer 5 : 3 cells with tanh activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Dense(3, activation= 'sigmoid')) #layer 6 : 6 cells with sigmoid activation function \n", "model.add(Dense(nb_actions))\n", "model.add(Activation('softmax')) # one layer of 1 unit with sigmoid activation function\n", "print(model.summary())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# DQN Deep Reinforcement Learning " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#Configure and compile the agent & policy. \n", "memory = SequentialMemory(limit=100000, window_length=1)\n", "policy = BoltzmannGumbelQPolicy()\n", "dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,\n", " target_model_update=1e-2, policy=policy)\n", "dqn.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training for 100000 steps ...\n", " 922/100000: episode: 1, duration: 29.546s, episode steps: 922, steps per second: 31, episode reward: 370.000, mean reward: 0.401 [0.000, 10.000], mean action: 3.359 [0.000, 8.000], mean observation: 72.595 [0.000, 228.000], loss: 2.750501, mean_absolute_error: 0.159018, acc: 0.409802, mean_q: 0.655422\n", " 1355/100000: episode: 2, duration: 12.858s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.009 [1.000, 6.000], mean observation: 72.910 [0.000, 228.000], loss: 1.932692, mean_absolute_error: 0.100334, acc: 0.736865, mean_q: 0.939665\n", " 1794/100000: episode: 3, duration: 12.995s, episode steps: 439, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.030 [0.000, 7.000], mean observation: 72.906 [0.000, 228.000], loss: 1.599364, mean_absolute_error: 0.077912, acc: 0.806948, mean_q: 0.964618\n", " 2230/100000: episode: 4, duration: 13.087s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.018 [0.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 1.493305, mean_absolute_error: 0.067161, acc: 0.844252, mean_q: 0.977191\n", " 2695/100000: episode: 5, duration: 19.184s, episode steps: 465, steps per second: 24, episode reward: 110.000, mean reward: 0.237 [0.000, 10.000], mean action: 3.015 [0.000, 8.000], mean observation: 72.873 [0.000, 228.000], loss: 1.364146, mean_absolute_error: 0.060264, acc: 0.862769, mean_q: 0.984469\n", " 3244/100000: episode: 6, duration: 17.063s, episode steps: 549, steps per second: 32, episode reward: 60.000, mean reward: 0.109 [0.000, 10.000], mean action: 3.002 [0.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 1.354492, mean_absolute_error: 0.054776, acc: 0.885986, mean_q: 0.989604\n", " 3764/100000: episode: 7, duration: 16.936s, episode steps: 520, steps per second: 31, episode reward: 110.000, mean reward: 0.212 [0.000, 10.000], mean action: 3.033 [2.000, 8.000], mean observation: 72.857 [0.000, 228.000], loss: 1.285808, mean_absolute_error: 0.049860, acc: 0.902103, mean_q: 0.992924\n", " 4286/100000: episode: 8, duration: 16.436s, episode steps: 522, steps per second: 32, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.021 [0.000, 8.000], mean observation: 72.854 [0.000, 228.000], loss: 1.222823, mean_absolute_error: 0.046015, acc: 0.913374, mean_q: 0.995000\n", " 4818/100000: episode: 9, duration: 16.453s, episode steps: 532, steps per second: 32, episode reward: 110.000, mean reward: 0.207 [0.000, 10.000], mean action: 2.994 [0.000, 6.000], mean observation: 72.854 [0.000, 228.000], loss: 1.269515, mean_absolute_error: 0.045624, acc: 0.919290, mean_q: 0.996452\n", " 5240/100000: episode: 10, duration: 13.077s, episode steps: 422, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.012 [2.000, 8.000], mean observation: 72.910 [0.000, 228.000], loss: 1.165863, mean_absolute_error: 0.041262, acc: 0.929354, mean_q: 0.997315\n", " 5660/100000: episode: 11, duration: 12.973s, episode steps: 420, steps per second: 32, episode reward: 60.000, mean reward: 0.143 [0.000, 10.000], mean action: 3.007 [0.000, 8.000], mean observation: 72.908 [0.000, 228.000], loss: 1.207981, mean_absolute_error: 0.041799, acc: 0.931696, mean_q: 0.997878\n", " 6188/100000: episode: 12, duration: 17.165s, episode steps: 528, steps per second: 31, episode reward: 110.000, mean reward: 0.208 [0.000, 10.000], mean action: 3.019 [0.000, 8.000], mean observation: 72.858 [0.000, 228.000], loss: 1.079953, mean_absolute_error: 0.037732, acc: 0.937855, mean_q: 0.998365\n", " 7318/100000: episode: 13, duration: 36.109s, episode steps: 1130, steps per second: 31, episode reward: 180.000, mean reward: 0.159 [0.000, 50.000], mean action: 3.014 [1.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 1.126516, mean_absolute_error: 0.036324, acc: 0.942893, mean_q: 0.998921\n", " 7749/100000: episode: 14, duration: 13.283s, episode steps: 431, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.021 [3.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 1.242681, mean_absolute_error: 0.034535, acc: 0.947288, mean_q: 0.999283\n", " 8193/100000: episode: 15, duration: 13.465s, episode steps: 444, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 1.276073, mean_absolute_error: 0.034827, acc: 0.949747, mean_q: 0.999426\n", " 8626/100000: episode: 16, duration: 13.100s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.912 [0.000, 228.000], loss: 1.219086, mean_absolute_error: 0.032687, acc: 0.953161, mean_q: 0.999536\n", " 9058/100000: episode: 17, duration: 13.008s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.014 [1.000, 8.000], mean observation: 72.917 [0.000, 228.000], loss: 0.966979, mean_absolute_error: 0.031524, acc: 0.955295, mean_q: 0.999627\n", " 9568/100000: episode: 18, duration: 15.549s, episode steps: 510, steps per second: 33, episode reward: 110.000, mean reward: 0.216 [0.000, 10.000], mean action: 2.994 [1.000, 4.000], mean observation: 72.859 [0.000, 228.000], loss: 0.906498, mean_absolute_error: 0.029293, acc: 0.959559, mean_q: 0.999698\n", " 9996/100000: episode: 19, duration: 13.345s, episode steps: 428, steps per second: 32, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.993 [0.000, 5.000], mean observation: 72.908 [0.000, 228.000], loss: 0.958007, mean_absolute_error: 0.029338, acc: 0.957579, mean_q: 0.999756\n", " 10419/100000: episode: 20, duration: 13.323s, episode steps: 423, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.005 [0.000, 8.000], mean observation: 72.906 [0.000, 228.000], loss: 1.070048, mean_absolute_error: 0.031253, acc: 0.959146, mean_q: 0.999802\n", " 10851/100000: episode: 21, duration: 13.485s, episode steps: 432, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.002 [2.000, 5.000], mean observation: 72.909 [0.000, 228.000], loss: 1.149229, mean_absolute_error: 0.031137, acc: 0.961010, mean_q: 0.999839\n", " 11429/100000: episode: 22, duration: 17.292s, episode steps: 578, steps per second: 33, episode reward: 110.000, mean reward: 0.190 [0.000, 10.000], mean action: 3.012 [0.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 1.168064, mean_absolute_error: 0.029637, acc: 0.962695, mean_q: 0.999876\n", " 11944/100000: episode: 23, duration: 15.214s, episode steps: 515, steps per second: 34, episode reward: 110.000, mean reward: 0.214 [0.000, 10.000], mean action: 2.992 [0.000, 4.000], mean observation: 72.853 [0.000, 228.000], loss: 1.067216, mean_absolute_error: 0.030874, acc: 0.961650, mean_q: 0.999906\n", " 12384/100000: episode: 24, duration: 13.070s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.935310, mean_absolute_error: 0.026834, acc: 0.966264, mean_q: 0.999924\n", " 12965/100000: episode: 25, duration: 17.462s, episode steps: 581, steps per second: 33, episode reward: 70.000, mean reward: 0.120 [0.000, 10.000], mean action: 3.014 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 1.057993, mean_absolute_error: 0.028709, acc: 0.966652, mean_q: 0.999940\n", " 13391/100000: episode: 26, duration: 12.881s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.865644, mean_absolute_error: 0.026880, acc: 0.966623, mean_q: 0.999953\n", " 13919/100000: episode: 27, duration: 15.738s, episode steps: 528, steps per second: 34, episode reward: 110.000, mean reward: 0.208 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.854 [0.000, 228.000], loss: 1.009845, mean_absolute_error: 0.028659, acc: 0.966974, mean_q: 0.999963\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 14350/100000: episode: 28, duration: 12.788s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 0.837868, mean_absolute_error: 0.025444, acc: 0.970780, mean_q: 0.999970\n", " 14775/100000: episode: 29, duration: 12.599s, episode steps: 425, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.012 [3.000, 6.000], mean observation: 72.913 [0.000, 228.000], loss: 0.885915, mean_absolute_error: 0.027345, acc: 0.966765, mean_q: 0.999976\n", " 15202/100000: episode: 30, duration: 12.630s, episode steps: 427, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [0.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 1.067898, mean_absolute_error: 0.027376, acc: 0.970653, mean_q: 0.999980\n", " 15628/100000: episode: 31, duration: 12.637s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.991 [1.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 0.926451, mean_absolute_error: 0.027283, acc: 0.971097, mean_q: 0.999984\n", " 16057/100000: episode: 32, duration: 12.722s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.804952, mean_absolute_error: 0.024675, acc: 0.971372, mean_q: 0.999987\n", " 16588/100000: episode: 33, duration: 15.744s, episode steps: 531, steps per second: 34, episode reward: 110.000, mean reward: 0.207 [0.000, 10.000], mean action: 3.013 [2.000, 8.000], mean observation: 72.854 [0.000, 228.000], loss: 0.925594, mean_absolute_error: 0.027047, acc: 0.972575, mean_q: 0.999990\n", " 17105/100000: episode: 34, duration: 15.288s, episode steps: 517, steps per second: 34, episode reward: 60.000, mean reward: 0.116 [0.000, 10.000], mean action: 3.008 [3.000, 5.000], mean observation: 72.920 [0.000, 228.000], loss: 0.892844, mean_absolute_error: 0.025035, acc: 0.971893, mean_q: 0.999992\n", " 17536/100000: episode: 35, duration: 12.722s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.023 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.973951, mean_absolute_error: 0.025876, acc: 0.975348, mean_q: 0.999994\n", " 17974/100000: episode: 36, duration: 13.189s, episode steps: 438, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.910 [0.000, 228.000], loss: 0.845903, mean_absolute_error: 0.023325, acc: 0.974244, mean_q: 0.999995\n", " 18405/100000: episode: 37, duration: 12.907s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.005 [0.000, 8.000], mean observation: 72.917 [0.000, 228.000], loss: 0.885737, mean_absolute_error: 0.025633, acc: 0.974985, mean_q: 0.999996\n", " 18834/100000: episode: 38, duration: 12.750s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.005 [2.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.966934, mean_absolute_error: 0.028101, acc: 0.971882, mean_q: 0.999997\n", " 19299/100000: episode: 39, duration: 13.883s, episode steps: 465, steps per second: 33, episode reward: 110.000, mean reward: 0.237 [0.000, 10.000], mean action: 3.004 [1.000, 6.000], mean observation: 72.894 [0.000, 228.000], loss: 0.950698, mean_absolute_error: 0.025855, acc: 0.973790, mean_q: 0.999997\n", " 19725/100000: episode: 40, duration: 12.708s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.016 [3.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 0.934569, mean_absolute_error: 0.025027, acc: 0.975719, mean_q: 0.999998\n", " 20161/100000: episode: 41, duration: 12.911s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.966219, mean_absolute_error: 0.025600, acc: 0.975989, mean_q: 0.999998\n", " 20590/100000: episode: 42, duration: 12.721s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.912 [0.000, 228.000], loss: 0.831869, mean_absolute_error: 0.024158, acc: 0.976034, mean_q: 0.999999\n", " 21022/100000: episode: 43, duration: 12.844s, episode steps: 432, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.021 [3.000, 8.000], mean observation: 72.912 [0.000, 228.000], loss: 1.055637, mean_absolute_error: 0.027234, acc: 0.977937, mean_q: 0.999999\n", " 21455/100000: episode: 44, duration: 12.819s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.021 [3.000, 7.000], mean observation: 72.909 [0.000, 228.000], loss: 0.836770, mean_absolute_error: 0.023797, acc: 0.978493, mean_q: 0.999999\n", " 21889/100000: episode: 45, duration: 12.940s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.912 [0.000, 228.000], loss: 0.836639, mean_absolute_error: 0.023856, acc: 0.977823, mean_q: 0.999999\n", " 22343/100000: episode: 46, duration: 13.496s, episode steps: 454, steps per second: 34, episode reward: 110.000, mean reward: 0.242 [0.000, 10.000], mean action: 3.009 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.805641, mean_absolute_error: 0.023271, acc: 0.977905, mean_q: 0.999999\n", " 22777/100000: episode: 47, duration: 12.854s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [0.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 1.006345, mean_absolute_error: 0.026670, acc: 0.974942, mean_q: 1.000000\n", " 23214/100000: episode: 48, duration: 12.924s, episode steps: 437, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.844489, mean_absolute_error: 0.023875, acc: 0.979334, mean_q: 1.000000\n", " 23641/100000: episode: 49, duration: 12.627s, episode steps: 427, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.991 [1.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.983037, mean_absolute_error: 0.026869, acc: 0.979435, mean_q: 1.000000\n", " 24067/100000: episode: 50, duration: 12.623s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.009 [3.000, 6.000], mean observation: 72.912 [0.000, 228.000], loss: 0.934281, mean_absolute_error: 0.025823, acc: 0.979020, mean_q: 1.000000\n", " 24494/100000: episode: 51, duration: 12.634s, episode steps: 427, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.988 [0.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.891939, mean_absolute_error: 0.025250, acc: 0.977532, mean_q: 1.000000\n", " 24932/100000: episode: 52, duration: 12.988s, episode steps: 438, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.876506, mean_absolute_error: 0.024347, acc: 0.979880, mean_q: 1.000000\n", " 25362/100000: episode: 53, duration: 12.794s, episode steps: 430, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.817384, mean_absolute_error: 0.022920, acc: 0.980959, mean_q: 1.000000\n", " 25784/100000: episode: 54, duration: 12.561s, episode steps: 422, steps per second: 34, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.002 [1.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 0.859725, mean_absolute_error: 0.023460, acc: 0.982968, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 26222/100000: episode: 55, duration: 12.969s, episode steps: 438, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.877860, mean_absolute_error: 0.020972, acc: 0.981949, mean_q: 1.000000\n", " 26654/100000: episode: 56, duration: 12.859s, episode steps: 432, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 0.893530, mean_absolute_error: 0.025140, acc: 0.978516, mean_q: 1.000000\n", " 27184/100000: episode: 57, duration: 15.685s, episode steps: 530, steps per second: 34, episode reward: 110.000, mean reward: 0.208 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.849 [0.000, 228.000], loss: 0.871484, mean_absolute_error: 0.024115, acc: 0.980837, mean_q: 1.000000\n", " 27647/100000: episode: 58, duration: 13.676s, episode steps: 463, steps per second: 34, episode reward: 110.000, mean reward: 0.238 [0.000, 10.000], mean action: 2.998 [1.000, 4.000], mean observation: 72.896 [0.000, 228.000], loss: 0.882798, mean_absolute_error: 0.022712, acc: 0.981776, mean_q: 1.000000\n", " 28083/100000: episode: 59, duration: 12.928s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.912 [0.000, 228.000], loss: 0.924782, mean_absolute_error: 0.024951, acc: 0.982870, mean_q: 1.000000\n", " 28515/100000: episode: 60, duration: 13.170s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [1.000, 5.000], mean observation: 72.909 [0.000, 228.000], loss: 0.950630, mean_absolute_error: 0.025596, acc: 0.982784, mean_q: 1.000000\n", " 28957/100000: episode: 61, duration: 13.095s, episode steps: 442, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 1.017386, mean_absolute_error: 0.025839, acc: 0.980345, mean_q: 1.000000\n", " 29391/100000: episode: 62, duration: 12.831s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.917 [0.000, 228.000], loss: 0.819406, mean_absolute_error: 0.023005, acc: 0.980703, mean_q: 1.000000\n", " 29824/100000: episode: 63, duration: 12.812s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 0.993344, mean_absolute_error: 0.026588, acc: 0.982318, mean_q: 1.000000\n", " 30267/100000: episode: 64, duration: 13.512s, episode steps: 443, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 2.989 [0.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.812653, mean_absolute_error: 0.022780, acc: 0.980954, mean_q: 1.000000\n", " 30707/100000: episode: 65, duration: 13.171s, episode steps: 440, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.907 [0.000, 228.000], loss: 0.804780, mean_absolute_error: 0.023003, acc: 0.979119, mean_q: 1.000000\n", " 31137/100000: episode: 66, duration: 12.918s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.958653, mean_absolute_error: 0.022369, acc: 0.983358, mean_q: 1.000000\n", " 31567/100000: episode: 67, duration: 13.041s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.842303, mean_absolute_error: 0.023308, acc: 0.981904, mean_q: 1.000000\n", " 31999/100000: episode: 68, duration: 12.833s, episode steps: 432, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.969986, mean_absolute_error: 0.024267, acc: 0.983362, mean_q: 1.000000\n", " 32434/100000: episode: 69, duration: 12.959s, episode steps: 435, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.018 [3.000, 7.000], mean observation: 72.910 [0.000, 228.000], loss: 0.943249, mean_absolute_error: 0.023929, acc: 0.981968, mean_q: 1.000000\n", " 32860/100000: episode: 70, duration: 12.618s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.005 [1.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 0.853826, mean_absolute_error: 0.022960, acc: 0.984815, mean_q: 1.000000\n", " 33301/100000: episode: 71, duration: 13.135s, episode steps: 441, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.945814, mean_absolute_error: 0.023591, acc: 0.983985, mean_q: 1.000000\n", " 33738/100000: episode: 72, duration: 12.972s, episode steps: 437, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.995 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.853012, mean_absolute_error: 0.023421, acc: 0.982194, mean_q: 1.000000\n", " 34169/100000: episode: 73, duration: 12.839s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.002 [0.000, 7.000], mean observation: 72.910 [0.000, 228.000], loss: 0.837158, mean_absolute_error: 0.022482, acc: 0.985426, mean_q: 1.000000\n", " 34595/100000: episode: 74, duration: 12.850s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.005 [2.000, 6.000], mean observation: 72.906 [0.000, 228.000], loss: 0.903527, mean_absolute_error: 0.022658, acc: 0.983715, mean_q: 1.000000\n", " 35022/100000: episode: 75, duration: 12.748s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.991 [0.000, 4.000], mean observation: 72.911 [0.000, 228.000], loss: 0.792673, mean_absolute_error: 0.021522, acc: 0.984778, mean_q: 1.000000\n", " 35448/100000: episode: 76, duration: 12.698s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [0.000, 6.000], mean observation: 72.908 [0.000, 228.000], loss: 0.867522, mean_absolute_error: 0.023318, acc: 0.983788, mean_q: 1.000000\n", " 35875/100000: episode: 77, duration: 12.782s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.835425, mean_absolute_error: 0.021034, acc: 0.984265, mean_q: 1.000000\n", " 36306/100000: episode: 78, duration: 12.901s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.007 [3.000, 5.000], mean observation: 72.917 [0.000, 228.000], loss: 0.875452, mean_absolute_error: 0.022229, acc: 0.982961, mean_q: 1.000000\n", " 36740/100000: episode: 79, duration: 12.985s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.947098, mean_absolute_error: 0.025119, acc: 0.984087, mean_q: 1.000000\n", " 37166/100000: episode: 80, duration: 12.678s, episode steps: 426, steps per second: 34, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.748542, mean_absolute_error: 0.020739, acc: 0.984522, mean_q: 1.000000\n", " 37612/100000: episode: 81, duration: 13.315s, episode steps: 446, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 2.996 [1.000, 3.000], mean observation: 72.920 [0.000, 228.000], loss: 0.752258, mean_absolute_error: 0.020643, acc: 0.984725, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 38046/100000: episode: 82, duration: 12.966s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.746647, mean_absolute_error: 0.020482, acc: 0.985383, mean_q: 1.000000\n", " 38482/100000: episode: 83, duration: 12.969s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.007 [2.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.842648, mean_absolute_error: 0.022776, acc: 0.984088, mean_q: 1.000000\n", " 38998/100000: episode: 84, duration: 15.467s, episode steps: 516, steps per second: 33, episode reward: 110.000, mean reward: 0.213 [0.000, 10.000], mean action: 3.010 [0.000, 8.000], mean observation: 72.853 [0.000, 228.000], loss: 0.791431, mean_absolute_error: 0.021996, acc: 0.982316, mean_q: 1.000000\n", " 39430/100000: episode: 85, duration: 12.932s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.770487, mean_absolute_error: 0.021347, acc: 0.983507, mean_q: 1.000000\n", " 39870/100000: episode: 86, duration: 13.094s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.912 [0.000, 228.000], loss: 0.851223, mean_absolute_error: 0.022746, acc: 0.985653, mean_q: 1.000000\n", " 40298/100000: episode: 87, duration: 12.801s, episode steps: 428, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.866558, mean_absolute_error: 0.023239, acc: 0.984959, mean_q: 1.000000\n", " 40738/100000: episode: 88, duration: 13.130s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.913 [0.000, 228.000], loss: 0.814577, mean_absolute_error: 0.021974, acc: 0.985369, mean_q: 1.000000\n", " 41180/100000: episode: 89, duration: 13.200s, episode steps: 442, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.993 [1.000, 6.000], mean observation: 72.913 [0.000, 228.000], loss: 0.791810, mean_absolute_error: 0.021386, acc: 0.985506, mean_q: 1.000000\n", " 41702/100000: episode: 90, duration: 15.572s, episode steps: 522, steps per second: 34, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.853 [0.000, 228.000], loss: 0.927495, mean_absolute_error: 0.024034, acc: 0.987428, mean_q: 1.000000\n", " 42138/100000: episode: 91, duration: 13.054s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.918 [0.000, 228.000], loss: 0.935920, mean_absolute_error: 0.023190, acc: 0.985163, mean_q: 1.000000\n", " 42568/100000: episode: 92, duration: 12.831s, episode steps: 430, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.907 [0.000, 228.000], loss: 0.749676, mean_absolute_error: 0.020661, acc: 0.984448, mean_q: 1.000000\n", " 43039/100000: episode: 93, duration: 13.984s, episode steps: 471, steps per second: 34, episode reward: 110.000, mean reward: 0.234 [0.000, 10.000], mean action: 3.011 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.832054, mean_absolute_error: 0.022190, acc: 0.985801, mean_q: 1.000000\n", " 43473/100000: episode: 94, duration: 12.932s, episode steps: 434, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.023 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.718739, mean_absolute_error: 0.019606, acc: 0.986679, mean_q: 1.000000\n", " 43904/100000: episode: 95, duration: 12.903s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.790879, mean_absolute_error: 0.020783, acc: 0.988399, mean_q: 1.000000\n", " 44329/100000: episode: 96, duration: 12.874s, episode steps: 425, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.912065, mean_absolute_error: 0.022168, acc: 0.987132, mean_q: 1.000000\n", " 44763/100000: episode: 97, duration: 13.024s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [2.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.837879, mean_absolute_error: 0.020969, acc: 0.984375, mean_q: 1.000000\n", " 45290/100000: episode: 98, duration: 15.742s, episode steps: 527, steps per second: 33, episode reward: 110.000, mean reward: 0.209 [0.000, 10.000], mean action: 3.009 [3.000, 8.000], mean observation: 72.863 [0.000, 228.000], loss: 0.899594, mean_absolute_error: 0.023656, acc: 0.986717, mean_q: 1.000000\n", " 45714/100000: episode: 99, duration: 13.066s, episode steps: 424, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.913 [0.000, 228.000], loss: 0.795141, mean_absolute_error: 0.020977, acc: 0.988060, mean_q: 1.000000\n", " 46150/100000: episode: 100, duration: 13.063s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.718090, mean_absolute_error: 0.019679, acc: 0.985737, mean_q: 1.000000\n", " 46580/100000: episode: 101, duration: 12.913s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 0.697030, mean_absolute_error: 0.019125, acc: 0.986773, mean_q: 1.000000\n", " 47013/100000: episode: 102, duration: 13.298s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.739714, mean_absolute_error: 0.019914, acc: 0.986937, mean_q: 1.000000\n", " 47448/100000: episode: 103, duration: 13.779s, episode steps: 435, steps per second: 32, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.738085, mean_absolute_error: 0.020341, acc: 0.984770, mean_q: 1.000000\n", " 47889/100000: episode: 104, duration: 13.802s, episode steps: 441, steps per second: 32, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.832484, mean_absolute_error: 0.021954, acc: 0.986820, mean_q: 1.000000\n", " 48319/100000: episode: 105, duration: 12.800s, episode steps: 430, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.798077, mean_absolute_error: 0.021110, acc: 0.987718, mean_q: 1.000000\n", " 48752/100000: episode: 106, duration: 12.868s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.888920, mean_absolute_error: 0.021588, acc: 0.987009, mean_q: 1.000000\n", " 49181/100000: episode: 107, duration: 12.792s, episode steps: 429, steps per second: 34, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.817413, mean_absolute_error: 0.021793, acc: 0.986233, mean_q: 1.000000\n", " 49614/100000: episode: 108, duration: 12.875s, episode steps: 433, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.014 [3.000, 7.000], mean observation: 72.911 [0.000, 228.000], loss: 0.804071, mean_absolute_error: 0.021243, acc: 0.987803, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 50042/100000: episode: 109, duration: 13.319s, episode steps: 428, steps per second: 32, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.796233, mean_absolute_error: 0.021130, acc: 0.987077, mean_q: 1.000000\n", " 50472/100000: episode: 110, duration: 13.205s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 6.000], mean observation: 72.909 [0.000, 228.000], loss: 0.958833, mean_absolute_error: 0.023197, acc: 0.987064, mean_q: 1.000000\n", " 51182/100000: episode: 111, duration: 21.496s, episode steps: 710, steps per second: 33, episode reward: 780.000, mean reward: 1.099 [0.000, 400.000], mean action: 3.023 [3.000, 8.000], mean observation: 72.878 [0.000, 228.000], loss: 1.750635, mean_absolute_error: 0.022866, acc: 0.987192, mean_q: 1.000000\n", " 51616/100000: episode: 112, duration: 12.981s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 2.289796, mean_absolute_error: 0.022587, acc: 0.986247, mean_q: 1.000000\n", " 52052/100000: episode: 113, duration: 13.128s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.018 [3.000, 7.000], mean observation: 72.909 [0.000, 228.000], loss: 6.869392, mean_absolute_error: 0.026987, acc: 0.988174, mean_q: 1.000000\n", " 52486/100000: episode: 114, duration: 12.987s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.911 [0.000, 228.000], loss: 0.908648, mean_absolute_error: 0.021901, acc: 0.987975, mean_q: 1.000000\n", " 53028/100000: episode: 115, duration: 16.227s, episode steps: 542, steps per second: 33, episode reward: 110.000, mean reward: 0.203 [0.000, 10.000], mean action: 3.015 [1.000, 8.000], mean observation: 72.861 [0.000, 228.000], loss: 0.927577, mean_absolute_error: 0.021164, acc: 0.988872, mean_q: 1.000000\n", " 53549/100000: episode: 116, duration: 15.605s, episode steps: 521, steps per second: 33, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.854 [0.000, 228.000], loss: 6.778060, mean_absolute_error: 0.024810, acc: 0.987224, mean_q: 1.000000\n", " 53976/100000: episode: 117, duration: 12.945s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.895132, mean_absolute_error: 0.023102, acc: 0.988876, mean_q: 1.000000\n", " 54417/100000: episode: 118, duration: 13.326s, episode steps: 441, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 0.815021, mean_absolute_error: 0.021397, acc: 0.987883, mean_q: 1.000000\n", " 54848/100000: episode: 119, duration: 12.849s, episode steps: 431, steps per second: 34, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 6.555128, mean_absolute_error: 0.023074, acc: 0.989342, mean_q: 1.000000\n", " 55278/100000: episode: 120, duration: 12.983s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.788291, mean_absolute_error: 0.020740, acc: 0.988517, mean_q: 1.000000\n", " 55712/100000: episode: 121, duration: 13.138s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.905 [0.000, 228.000], loss: 0.701904, mean_absolute_error: 0.018827, acc: 0.988767, mean_q: 1.000000\n", " 56144/100000: episode: 122, duration: 12.976s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.016 [3.000, 8.000], mean observation: 72.912 [0.000, 228.000], loss: 6.674213, mean_absolute_error: 0.026572, acc: 0.986400, mean_q: 1.000000\n", " 56584/100000: episode: 123, duration: 13.134s, episode steps: 440, steps per second: 34, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 6.481470, mean_absolute_error: 0.024026, acc: 0.989205, mean_q: 1.000000\n", " 57010/100000: episode: 124, duration: 12.779s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 2.567588, mean_absolute_error: 0.024750, acc: 0.987089, mean_q: 1.000000\n", " 57440/100000: episode: 125, duration: 12.885s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [0.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 2.430678, mean_absolute_error: 0.023412, acc: 0.988154, mean_q: 1.000000\n", " 57878/100000: episode: 126, duration: 13.035s, episode steps: 438, steps per second: 34, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.009 [3.000, 5.000], mean observation: 72.909 [0.000, 228.000], loss: 2.234050, mean_absolute_error: 0.021098, acc: 0.989155, mean_q: 1.000000\n", " 58316/100000: episode: 127, duration: 13.131s, episode steps: 438, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 1.070799, mean_absolute_error: 0.022651, acc: 0.986658, mean_q: 1.000000\n", " 58750/100000: episode: 128, duration: 13.015s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.919 [0.000, 228.000], loss: 3.657577, mean_absolute_error: 0.023798, acc: 0.988119, mean_q: 1.000000\n", " 59176/100000: episode: 129, duration: 12.747s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 2.296508, mean_absolute_error: 0.023142, acc: 0.989583, mean_q: 1.000000\n", " 59653/100000: episode: 130, duration: 14.280s, episode steps: 477, steps per second: 33, episode reward: 110.000, mean reward: 0.231 [0.000, 10.000], mean action: 3.010 [3.000, 8.000], mean observation: 72.867 [0.000, 228.000], loss: 0.889277, mean_absolute_error: 0.021522, acc: 0.988273, mean_q: 1.000000\n", " 60084/100000: episode: 131, duration: 12.940s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.917 [0.000, 228.000], loss: 0.812250, mean_absolute_error: 0.021473, acc: 0.987602, mean_q: 1.000000\n", " 60510/100000: episode: 132, duration: 12.855s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 6.668528, mean_absolute_error: 0.024226, acc: 0.987969, mean_q: 1.000000\n", " 60951/100000: episode: 133, duration: 13.184s, episode steps: 441, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.011 [3.000, 6.000], mean observation: 72.905 [0.000, 228.000], loss: 6.516801, mean_absolute_error: 0.023675, acc: 0.988450, mean_q: 1.000000\n", " 61377/100000: episode: 134, duration: 12.783s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.774365, mean_absolute_error: 0.020130, acc: 0.990023, mean_q: 1.000000\n", " 61807/100000: episode: 135, duration: 12.900s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.012 [3.000, 6.000], mean observation: 72.918 [0.000, 228.000], loss: 0.730819, mean_absolute_error: 0.019347, acc: 0.988808, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 62241/100000: episode: 136, duration: 12.967s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.739101, mean_absolute_error: 0.019514, acc: 0.989127, mean_q: 1.000000\n", " 62688/100000: episode: 137, duration: 13.364s, episode steps: 447, steps per second: 33, episode reward: 60.000, mean reward: 0.134 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.831790, mean_absolute_error: 0.021591, acc: 0.988465, mean_q: 1.000000\n", " 63121/100000: episode: 138, duration: 13.000s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 7.999557, mean_absolute_error: 0.025455, acc: 0.988380, mean_q: 1.000000\n", " 63546/100000: episode: 139, duration: 13.515s, episode steps: 425, steps per second: 31, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.910 [0.000, 228.000], loss: 6.665014, mean_absolute_error: 0.023986, acc: 0.988235, mean_q: 1.000000\n", " 63987/100000: episode: 140, duration: 13.334s, episode steps: 441, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.847174, mean_absolute_error: 0.021910, acc: 0.988662, mean_q: 1.000000\n", " 64433/100000: episode: 141, duration: 13.484s, episode steps: 446, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.004 [3.000, 5.000], mean observation: 72.907 [0.000, 228.000], loss: 0.801544, mean_absolute_error: 0.021092, acc: 0.988299, mean_q: 1.000000\n", " 64862/100000: episode: 142, duration: 12.891s, episode steps: 429, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 6.596956, mean_absolute_error: 0.023347, acc: 0.989948, mean_q: 1.000000\n", " 65301/100000: episode: 143, duration: 13.241s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 0.756605, mean_absolute_error: 0.020061, acc: 0.988468, mean_q: 1.000000\n", " 65740/100000: episode: 144, duration: 13.186s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.014 [3.000, 8.000], mean observation: 72.913 [0.000, 228.000], loss: 0.754340, mean_absolute_error: 0.019724, acc: 0.990248, mean_q: 1.000000\n", " 66175/100000: episode: 145, duration: 13.118s, episode steps: 435, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.998 [0.000, 5.000], mean observation: 72.912 [0.000, 228.000], loss: 13.629837, mean_absolute_error: 0.026191, acc: 0.991738, mean_q: 1.000000\n", " 66613/100000: episode: 146, duration: 13.218s, episode steps: 438, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.728253, mean_absolute_error: 0.019346, acc: 0.988870, mean_q: 1.000000\n", " 67050/100000: episode: 147, duration: 13.268s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 0.771391, mean_absolute_error: 0.020079, acc: 0.990132, mean_q: 1.000000\n", " 67487/100000: episode: 148, duration: 13.208s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 1.055708, mean_absolute_error: 0.023504, acc: 0.988630, mean_q: 1.000000\n", " 67917/100000: episode: 149, duration: 13.018s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.904 [0.000, 228.000], loss: 0.734914, mean_absolute_error: 0.019412, acc: 0.989390, mean_q: 1.000000\n", " 68349/100000: episode: 150, duration: 13.049s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.879950, mean_absolute_error: 0.020989, acc: 0.988860, mean_q: 1.000000\n", " 68776/100000: episode: 151, duration: 12.993s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.002 [2.000, 5.000], mean observation: 72.910 [0.000, 228.000], loss: 0.810485, mean_absolute_error: 0.021195, acc: 0.988583, mean_q: 1.000000\n", " 69200/100000: episode: 152, duration: 13.099s, episode steps: 424, steps per second: 32, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 6.769201, mean_absolute_error: 0.024035, acc: 0.989608, mean_q: 1.000000\n", " 69634/100000: episode: 153, duration: 13.483s, episode steps: 434, steps per second: 32, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 6.727956, mean_absolute_error: 0.025991, acc: 0.990351, mean_q: 1.000000\n", " 70065/100000: episode: 154, duration: 13.497s, episode steps: 431, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.831990, mean_absolute_error: 0.019917, acc: 0.988907, mean_q: 1.000000\n", " 70500/100000: episode: 155, duration: 13.260s, episode steps: 435, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.903836, mean_absolute_error: 0.021813, acc: 0.987644, mean_q: 1.000000\n", " 70924/100000: episode: 156, duration: 12.851s, episode steps: 424, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.813252, mean_absolute_error: 0.020972, acc: 0.990050, mean_q: 1.000000\n", " 71351/100000: episode: 157, duration: 12.881s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.762409, mean_absolute_error: 0.019593, acc: 0.991437, mean_q: 1.000000\n", " 71898/100000: episode: 158, duration: 16.541s, episode steps: 547, steps per second: 33, episode reward: 60.000, mean reward: 0.110 [0.000, 10.000], mean action: 3.004 [3.000, 5.000], mean observation: 72.901 [0.000, 228.000], loss: 5.462224, mean_absolute_error: 0.022918, acc: 0.989431, mean_q: 1.000000\n", " 72325/100000: episode: 159, duration: 12.927s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.913 [0.000, 228.000], loss: 0.815619, mean_absolute_error: 0.020754, acc: 0.991584, mean_q: 1.000000\n", " 72749/100000: episode: 160, duration: 12.806s, episode steps: 424, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.805345, mean_absolute_error: 0.020817, acc: 0.989829, mean_q: 1.000000\n", " 73176/100000: episode: 161, duration: 12.953s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 6.730695, mean_absolute_error: 0.024181, acc: 0.989095, mean_q: 1.000000\n", " 73615/100000: episode: 162, duration: 13.336s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.837817, mean_absolute_error: 0.020025, acc: 0.989749, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 74051/100000: episode: 163, duration: 13.276s, episode steps: 436, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 6.441117, mean_absolute_error: 0.021877, acc: 0.989822, mean_q: 1.000000\n", " 74488/100000: episode: 164, duration: 13.053s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.819825, mean_absolute_error: 0.021008, acc: 0.990346, mean_q: 1.000000\n", " 74921/100000: episode: 165, duration: 13.294s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 2.998 [0.000, 5.000], mean observation: 72.913 [0.000, 228.000], loss: 0.865574, mean_absolute_error: 0.022025, acc: 0.990257, mean_q: 1.000000\n", " 75354/100000: episode: 166, duration: 13.565s, episode steps: 433, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.708961, mean_absolute_error: 0.018218, acc: 0.992350, mean_q: 1.000000\n", " 75781/100000: episode: 167, duration: 12.828s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 8.054317, mean_absolute_error: 0.024153, acc: 0.989681, mean_q: 1.000000\n", " 76295/100000: episode: 168, duration: 15.366s, episode steps: 514, steps per second: 33, episode reward: 110.000, mean reward: 0.214 [0.000, 10.000], mean action: 3.010 [3.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 5.656568, mean_absolute_error: 0.023127, acc: 0.990333, mean_q: 1.000000\n", " 76710/100000: episode: 169, duration: 12.637s, episode steps: 415, steps per second: 33, episode reward: 60.000, mean reward: 0.145 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.841335, mean_absolute_error: 0.019893, acc: 0.990361, mean_q: 1.000000\n", " 77142/100000: episode: 170, duration: 13.082s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.767345, mean_absolute_error: 0.017985, acc: 0.991753, mean_q: 1.000000\n", " 77592/100000: episode: 171, duration: 13.450s, episode steps: 450, steps per second: 33, episode reward: 110.000, mean reward: 0.244 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.899 [0.000, 228.000], loss: 0.689204, mean_absolute_error: 0.018016, acc: 0.991042, mean_q: 1.000000\n", " 78114/100000: episode: 172, duration: 15.622s, episode steps: 522, steps per second: 33, episode reward: 110.000, mean reward: 0.211 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.850 [0.000, 228.000], loss: 0.714887, mean_absolute_error: 0.018603, acc: 0.990960, mean_q: 1.000000\n", " 78544/100000: episode: 173, duration: 12.919s, episode steps: 430, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.737498, mean_absolute_error: 0.019308, acc: 0.989971, mean_q: 1.000000\n", " 78975/100000: episode: 174, duration: 12.894s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.918 [0.000, 228.000], loss: 0.797951, mean_absolute_error: 0.018914, acc: 0.990502, mean_q: 1.000000\n", " 79417/100000: episode: 175, duration: 13.310s, episode steps: 442, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 12.094293, mean_absolute_error: 0.026394, acc: 0.991233, mean_q: 1.000000\n", " 79849/100000: episode: 176, duration: 12.968s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.005 [3.000, 5.000], mean observation: 72.907 [0.000, 228.000], loss: 0.746510, mean_absolute_error: 0.019266, acc: 0.991247, mean_q: 1.000000\n", " 80281/100000: episode: 177, duration: 13.057s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.012 [3.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 0.878578, mean_absolute_error: 0.020813, acc: 0.990162, mean_q: 1.000000\n", " 80717/100000: episode: 178, duration: 12.996s, episode steps: 436, steps per second: 34, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.914 [0.000, 228.000], loss: 0.747418, mean_absolute_error: 0.017835, acc: 0.990396, mean_q: 1.000000\n", " 81150/100000: episode: 179, duration: 13.030s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 2.231029, mean_absolute_error: 0.021677, acc: 0.991700, mean_q: 1.000000\n", " 81577/100000: episode: 180, duration: 12.772s, episode steps: 427, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.911 [0.000, 228.000], loss: 0.783079, mean_absolute_error: 0.020112, acc: 0.990925, mean_q: 1.000000\n", " 82008/100000: episode: 181, duration: 12.885s, episode steps: 431, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.696581, mean_absolute_error: 0.018047, acc: 0.991734, mean_q: 1.000000\n", " 82440/100000: episode: 182, duration: 13.183s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.836997, mean_absolute_error: 0.019772, acc: 0.991030, mean_q: 1.000000\n", " 82871/100000: episode: 183, duration: 13.619s, episode steps: 431, steps per second: 32, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.916 [0.000, 228.000], loss: 13.865776, mean_absolute_error: 0.028900, acc: 0.991372, mean_q: 1.000000\n", " 83293/100000: episode: 184, duration: 12.943s, episode steps: 422, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 2.995 [0.000, 4.000], mean observation: 72.904 [0.000, 228.000], loss: 0.801700, mean_absolute_error: 0.020705, acc: 0.989929, mean_q: 1.000000\n", " 83729/100000: episode: 185, duration: 13.499s, episode steps: 436, steps per second: 32, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.009 [3.000, 7.000], mean observation: 72.912 [0.000, 228.000], loss: 6.508937, mean_absolute_error: 0.023269, acc: 0.990181, mean_q: 1.000000\n", " 84173/100000: episode: 186, duration: 13.531s, episode steps: 444, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.848594, mean_absolute_error: 0.019877, acc: 0.991695, mean_q: 1.000000\n", " 84615/100000: episode: 187, duration: 13.393s, episode steps: 442, steps per second: 33, episode reward: 60.000, mean reward: 0.136 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.853347, mean_absolute_error: 0.021670, acc: 0.990950, mean_q: 1.000000\n", " 85049/100000: episode: 188, duration: 13.252s, episode steps: 434, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 6.567101, mean_absolute_error: 0.023859, acc: 0.991215, mean_q: 1.000000\n", " 85488/100000: episode: 189, duration: 13.414s, episode steps: 439, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.911 [0.000, 228.000], loss: 12.108674, mean_absolute_error: 0.024965, acc: 0.991315, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 85917/100000: episode: 190, duration: 13.049s, episode steps: 429, steps per second: 33, episode reward: 60.000, mean reward: 0.140 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.909 [0.000, 228.000], loss: 0.760346, mean_absolute_error: 0.019699, acc: 0.989948, mean_q: 1.000000\n", " 86352/100000: episode: 191, duration: 13.272s, episode steps: 435, steps per second: 33, episode reward: 60.000, mean reward: 0.138 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.912 [0.000, 228.000], loss: 0.691576, mean_absolute_error: 0.017892, acc: 0.991882, mean_q: 1.000000\n", " 86777/100000: episode: 192, duration: 12.884s, episode steps: 425, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 2.998 [2.000, 3.000], mean observation: 72.913 [0.000, 228.000], loss: 0.688200, mean_absolute_error: 0.018250, acc: 0.989853, mean_q: 1.000000\n", " 87199/100000: episode: 193, duration: 12.817s, episode steps: 422, steps per second: 33, episode reward: 60.000, mean reward: 0.142 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.910 [0.000, 228.000], loss: 0.798901, mean_absolute_error: 0.020241, acc: 0.992595, mean_q: 1.000000\n", " 87631/100000: episode: 194, duration: 13.218s, episode steps: 432, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.906 [0.000, 228.000], loss: 0.758069, mean_absolute_error: 0.017800, acc: 0.991536, mean_q: 1.000000\n", " 88076/100000: episode: 195, duration: 13.552s, episode steps: 445, steps per second: 33, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.002 [3.000, 4.000], mean observation: 72.907 [0.000, 228.000], loss: 0.755982, mean_absolute_error: 0.019846, acc: 0.989045, mean_q: 1.000000\n", " 88513/100000: episode: 196, duration: 13.312s, episode steps: 437, steps per second: 33, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.915 [0.000, 228.000], loss: 0.821040, mean_absolute_error: 0.020977, acc: 0.990775, mean_q: 1.000000\n", " 88975/100000: episode: 197, duration: 14.064s, episode steps: 462, steps per second: 33, episode reward: 110.000, mean reward: 0.238 [0.000, 10.000], mean action: 3.000 [2.000, 4.000], mean observation: 72.892 [0.000, 228.000], loss: 0.816265, mean_absolute_error: 0.019451, acc: 0.990530, mean_q: 1.000000\n", " 89401/100000: episode: 198, duration: 12.976s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.007 [3.000, 6.000], mean observation: 72.912 [0.000, 228.000], loss: 0.783816, mean_absolute_error: 0.020237, acc: 0.990244, mean_q: 1.000000\n", " 89827/100000: episode: 199, duration: 12.980s, episode steps: 426, steps per second: 33, episode reward: 60.000, mean reward: 0.141 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.907 [0.000, 228.000], loss: 3.702346, mean_absolute_error: 0.023155, acc: 0.990390, mean_q: 1.000000\n", " 90270/100000: episode: 200, duration: 13.735s, episode steps: 443, steps per second: 32, episode reward: 60.000, mean reward: 0.135 [0.000, 10.000], mean action: 3.020 [3.000, 7.000], mean observation: 72.914 [0.000, 228.000], loss: 6.430346, mean_absolute_error: 0.023296, acc: 0.991112, mean_q: 1.000000\n", " 90691/100000: episode: 201, duration: 12.984s, episode steps: 421, steps per second: 32, episode reward: 60.000, mean reward: 0.143 [0.000, 10.000], mean action: 2.995 [1.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 6.711028, mean_absolute_error: 0.023351, acc: 0.990202, mean_q: 1.000000\n", " 91130/100000: episode: 202, duration: 14.014s, episode steps: 439, steps per second: 31, episode reward: 60.000, mean reward: 0.137 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 0.857046, mean_absolute_error: 0.020169, acc: 0.991173, mean_q: 1.000000\n", " 91578/100000: episode: 203, duration: 13.709s, episode steps: 448, steps per second: 33, episode reward: 60.000, mean reward: 0.134 [0.000, 10.000], mean action: 3.000 [3.000, 3.000], mean observation: 72.908 [0.000, 228.000], loss: 7.718023, mean_absolute_error: 0.023832, acc: 0.991211, mean_q: 1.000000\n", " 92011/100000: episode: 204, duration: 13.224s, episode steps: 433, steps per second: 33, episode reward: 60.000, mean reward: 0.139 [0.000, 10.000], mean action: 2.993 [0.000, 3.000], mean observation: 72.912 [0.000, 228.000], loss: 0.776836, mean_absolute_error: 0.018400, acc: 0.990834, mean_q: 1.000000\n", "done, took 2826.234 seconds\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Visualize the training during 5000000 steps \n", "## Each episode is a game in wich Pacman has two lifes \n", "### When nb_steps is not long enough , a warning might appear \n", "### \"Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!\"\n", "\n", "dqn.fit(env, nb_steps=100000, visualize=True, verbose=2)\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAGlpJREFUeJzt3XuUFPWd9/H31+GOiAYnkQUiiZj1GFdQRqNrdFGji8bF3Y3ZeEm8rBsSj0azcdfVeFYTT042bs5q3OhRifqI9wsaJR6MjxKj0Sg6ICAXzRKjAZbIgIoCcpmZz/PHr+bpcZihu2d66Oni8zqnz1RX/brq29XVn67+VXVNSMLMzPJrl2oXYGZmvctBb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOVdy0EdEXUS8EhGPdTJtYETcHxHLImJORIytZJFmZtZ95ezRXwQs7WLaucC7ksYB1wJX97QwMzOrjJKCPiJGA18EbumiycnA9Gx4BnBsRETPyzMzs57qV2K7nwCXAMO6mD4KWA4gqTki1gEjgDXtG0XEVGAqwNChQyfut99+3anZzGynNXfu3DWS6st5TNGgj4iTgNWS5kbEpO4WByBpGjANoKGhQY2NjT2ZnZnZTici3ir3MaV03RwBTImIN4H7gGMi4q4ObVYCY7Ii+gHDgbXlFmNmZpVXNOglXSZptKSxwKnAryR9tUOzmcBZ2fApWRtfLc3MrA8otY9+GxFxFdAoaSZwK3BnRCwD3iF9IJiZWR9QVtBL+jXw62z4inbjNwFfrmRhZmZWGf5lrJlZzjnozcxyzkFvZpZzDnozs5xz0JuZ5ZyD3sws5xz0ZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHKuaNBHxKCIeCkiFkTE4oj4fidtzo6IpoiYn93+qXfKNTOzcpXyP2M3A8dIWh8R/YHnIuJxSS92aHe/pAsqX6KZmfVE0aCXJGB9drd/dlNvFmVmZpVTUh99RNRFxHxgNfCkpDmdNPtSRCyMiBkRMaaiVZqZWbeVFPSSWiRNAEYDh0bEAR2a/AIYK+lA4ElgemfziYipEdEYEY1NTU09qdvMzEpU1lk3kt4DngYmdxi/VtLm7O4twMQuHj9NUoOkhvr6+u7Ua2ZmZSrlrJv6iNg9Gx4MHAe81qHNyHZ3pwBLK1mkmZl1Xyln3YwEpkdEHemD4QFJj0XEVUCjpJnAhRExBWgG3gHO7q2CzcysPJFOqtnxGhoa1NjYWJVlm5nVqoiYK6mhnMf4l7FmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5xz0JuZ5ZyD3sws5xz0ZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOFQ36iBgUES9FxIKIWBwR3++kzcCIuD8ilkXEnIgY2xvFmplZ+UrZo98MHCNpPDABmBwRh3Vocy7wrqRxwLXA1ZUt08zMuqto0CtZn93tn93UodnJwPRseAZwbERExao0M7NuK6mPPiLqImI+sBp4UtKcDk1GAcsBJDUD64ARncxnakQ0RkRjU1NTzyo3M7OSlBT0klokTQBGA4dGxAHdWZikaZIaJDXU19d3ZxZmZlamss66kfQe8DQwucOklcAYgIjoBwwH1laiQDMz65lSzrqpj4jds+HBwHHAax2azQTOyoZPAX4lqWM/vpmZVUG/EtqMBKZHRB3pg+EBSY9FxFVAo6SZwK3AnRGxDHgHOLXXKjYzs7IUDXpJC4GDOhl/RbvhTcCXK1uamZlVgn8Za2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5wr5Z+Dj4mIpyNiSUQsjoiLOmkzKSLWRcT87HZFZ/MyM7Mdr5R/Dt4MXCxpXkQMA+ZGxJOSlnRo9xtJJ1W+RDMz64mie/SSVkmalw1/ACwFRvV2YWZmVhll9dFHxFjgIGBOJ5MPj4gFEfF4RHy2i8dPjYjGiGhsamoqu1gzMytfyUEfEbsCDwHflvR+h8nzgL0ljQd+CjzS2TwkTZPUIKmhvr6+uzWbmVkZSgr6iOhPCvm7JT3ccbqk9yWtz4ZnAf0jYs+KVmpmZt1Sylk3AdwKLJV0TRdt9sraERGHZvNdW8lCzcyse0o56+YI4GvAqxExPxv3XeCTAJJuAk4BzouIZuBD4FRJ6oV6zcysTEWDXtJzQBRpcz1wfaWKMjOzyvEvY83Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzpfxz8DER8XRELImIxRFxUSdtIiL+OyKWRcTCiDi4d8o1M7NylfLPwZuBiyXNi4hhwNyIeFLSknZtTgD2zW6fA27M/pqZWZUV3aOXtErSvGz4A2ApMKpDs5OBO5S8COweESMrXq2ZmZWtrD76iBgLHATM6TBpFLC83f0VbPthQERMjYjGiGhsamoqr1IzM+uWkoM+InYFHgK+Len97ixM0jRJDZIa6uvruzMLMzMrU0lBHxH9SSF/t6SHO2myEhjT7v7obJyZmVVZKWfdBHArsFTSNV00mwmcmZ19cxiwTtKqCtZpZmbdVMpZN0cAXwNejYj52bjvAp8EkHQTMAs4EVgGbATOqXypZmbWHUWDXtJzQBRpI+D8ShVlZmaV41/GmpnlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5xz0JuZ5ZyD3sws5xz0ZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOdK+efgt0XE6ohY1MX0SRGxLiLmZ7crKl+mmZl1Vyn/HPx24Hrgju20+Y2kkypSkZmZVVTRPXpJzwLv7IBazMysF1Sqj/7wiFgQEY9HxGe7ahQRUyOiMSIam5qaKrRoMzPbnkoE/Txgb0njgZ8Cj3TVUNI0SQ2SGurr6yuwaDMzK6bHQS/pfUnrs+FZQP+I2LPHlZmZWUX0OOgjYq+IiGz40Gyea3s6XzMzq4yiZ91ExL3AJGDPiFgBXAn0B5B0E3AKcF5ENAMfAqdKUq9VbGZmZSka9JJOKzL9etLpl2Zm1gf5l7FmZjnnoDczyzkHvZlZzjnozcxyzkFvZpZzDnozs5xz0JuZ5ZyD3sws5xz0ZmY556A3M8s5B72ZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLuaJBHxG3RcTqiFjUxfSIiP+OiGURsTAiDq58mWZm1l2l7NHfDkzezvQTgH2z21Tgxp6XZWZmlVI06CU9C7yznSYnA3coeRHYPSJGVqpAMzPrmUr00Y8Clre7vyIbt42ImBoRjRHR2NTUVIFFm5lZMTv0YKykaZIaJDXU19fvyEWbme20KhH0K4Ex7e6PzsaZmVkfUImgnwmcmZ19cxiwTtKqCszXzMwqoF+xBhFxLzAJ2DMiVgBXAv0BJN0EzAJOBJYBG4FzeqtYMzMrX9Ggl3RakekCzq9YRWZmVlH+ZayZWc456M3Mcs5Bb2aWcw56M7Occ9CbmeWcg97MLOcc9GZmOeegNzPLOQe9mVnOOejNzHLOQW9mlnMOejOznHPQm5nlnIPezCznHPRmZjnnoDczyzkHvZlZzjnozcxyrqSgj4jJEfF6RCyLiEs7mX52RDRFxPzs9k+VL9XMzLqjlH8OXgfcABwHrABejoiZkpZ0aHq/pAt6oUYzM+uBUvboDwWWSXpD0hbgPuDk3i3LzMwqpZSgHwUsb3d/RTauoy9FxMKImBERYypSnZmZ9VilDsb+Ahgr6UDgSWB6Z40iYmpENEZEY1NTU4UWbWZm21NK0K8E2u+hj87G/X+S1kranN29BZjY2YwkTZPUIKmhvr6+O/WamVmZSgn6l4F9I+JTETEAOBWY2b5BRIxsd3cKsLRyJZqZWU8UPetGUnNEXAA8AdQBt0laHBFXAY2SZgIXRsQUoBl4Bzi7F2s2M7MyhKSqLLhh/Hg1LlhQlWWbmdWqiJgrqaGcx1Tvl7ELF8JS9/CYmfW26l4C4cEHq7p4M7OdQfWCftdd4YEHqrZ4M7OdRfWCfo89YPHidDMzs15T3aCPcPeNmVkvK3p6Za/p3z+F/BFHVK0EM7OdQfWCHuBLX6rq4s3MdgbV/8cjt98Od91V7SrMesfmzcXbmPWy6gf9nXfCD34Apfxwa+lSOOEE8A+trFwSfPe78K1vQUvLjlnmggUwciTMmrVjltebNm+GmTNh9uxqV2LdUN2uG4AvfxnOOw9efRUOPLDrdqtWpZB/6y344x9h7lwYNKjztkuWpI1yl13gkEPg6KOhtRVuuSWd1rnrrjB0KAwfDhMmQL8yV8Pq1elAciUuzHb//TB/PnzvezBwYM/n16a1Na3TJUvgtNMqM8/16+Htt2GffSozv1KsXQtDhsDgwT2bz5VXwn/8RxpuaYEbbkivYW9ZtQr+5m9g48baPQ4lwVNPwT33wM9/DuvWped07LGwcmV6XfbYo9pVWikkVeU2ceJESZLeflvaZRfp8svVpS1bpIMPloYMkX70I2nwYOnZZztv+7vfSR/7mJQ2U+mf/zmNf//9wrj2t3/5l66X25mHH5b+7M+kP/6xvMd11NoqXXVVoY6/+itp7dqezfODD6Tp06UzzpA+/vE03wEDpI0bezZfSZo3T/r0p6W6OumnP+35/ErxwAPSsGHSAQdITU3dn89NN6V1ce650iWXSN/8ptTSUrk6O9qwQTrkkLS9zpuXxr3wgvT44723zN5w0UVpvQ0fLp19dqp/yxZp82Zpn32ko46SNm2qdpU7HdI1xsrK2+oHvSQde6y0774p/Lpy223SY4+l4bff7rzNmjVpPiNGSEuXSuvXSx9+mKa1tEgrVkivvSY1NkrPPCPdfLO0alWavnlz18uW0nwuuCCtss9/PgVPa6v0D/8gXXtt8cd39OMfp3mdeWYK51GjpGXLyptHR1dckeZZXy+dfrp0++3pOUvSokXpzVpu6Le2pvU0cGCq8cQTpbvv7lmdxWzeLH3rW+m5HHSQNGiQdOqp3Z/fwoXS178ubd2ank/bdlaJD8COWlqkU06RIqRHHknjWlulI46Qdt1VWrCg8svsLU88IX3ve52H+b33ptfnK1/p3Q9N20btBv3NN0sTJ267R9vamvbQO9PaKj34oLRuXWHc0qXSuHHSb35T6jpLmpulSZOkCy/sfKN+/XVpwoTCN4S2UP/gA+kLX0jjx42Trr5aeu65wofL9qxZkz4gOoZOa2taXqleekl68cU0/N576QOsszfeLbek8DniiLTsUr3wQnp+xx0nrV790Q/jGTPSB2elrV0rffKThXX99NNdf7hvz5tvdr3z8MYb0t57S/fc05NKt7VpU/pQ+vGPPzp+5cr0QTlmjPS//1vZZVbSrFnSddeV1vY//zNtG//6r71bk31Ed4K+elevbGhQY2NjW/9R6i9991246aZ0jv2AAeng6+23w8svwwEHfHQGr70Gn/0snHMO/Oxnhf7WrVvT48uxZQtccglcdx3svz985jPQ3Jz6zSdOhAsvhLvvhunT4aSTPvpYCX75S7j8cnjllTTuoYfg7/8+HUd49NGPtl21KvUPd9Uf/5OfwGWXwZlnFmrr1y89R4Bbb03LGTwYVqyA++5LfaZPPVX8eT74IHztazB2LNx4Ixx6aDpW0V5rKyxalI4btNXw+ONw/PFQV1dot2kT7Ltv6rf9xjcKtZ5+Onzuc/D738MPf7htDd/8ZjpusngxXHNNOo5SV5dugwalfvQBA9J8hw//6GO3boUf/Qi+851C3W3bDqTHrluX+uCbm9O6uvJKuPjibev48EOYPBl++1s466x03GbECPj3f0/T778/bWNbtqRbczN8/OPptQG47TZ4881C7XV16cDr2WcXTizoeAxg3jw48kjYb7+0nR9ySKr3nnvSYzZsSLf169Opx4cfno5JXXVVYR4DBqRaTz8dDjoobQMPP5yez6ZNhb9f/3p6fzQ2wn/9V3rsbrul2/DhaTvYe+/0OsyalZ7j8uUwbVqa74svFn8fSeng9g03wB13pHnOng3PP5/WV79+aTsdPDi97v36wQsvpONGbet1y5a07r7znTTPGTPS9tf+WNoeexTed7/9bTo+0NKSnuuGDelYwT/+Y5p+/fVp22tvzJjC/G+8Mb0H279uY8cWjmP98Idp+qZNaXvcbbd07PArXynUt3FjYRtraYFx4+ALX0jTf/CDtKwhQ7a/7nqgO1ev7Bt79G1ef13b9KGfc07Xe2WXXpra/O3fpm6V5ubyPx7be+SR9M3iL/4idRm0fTPYuFFavrz4499+W3r00cI3k7a96Pa3Pfcs9Nt2pqlJOv74dJxhr73Snu0BBxSmf+MbadqQIdLQoanPuf23mmKefVbaffe03p54Io17+eV0rOKLXyxMg7QHvz1vvZW+IdTVpXqGD5fuvDNNmztXGj1629svfpGmP/NM2sMdOTIdTxgxIi17e/3/zzyTjuccfXTqUjjySOnv/q4wfdSodExi8ODUTTJhQqHrqjPvvSf99V+n5e+2W9rDbzNlSloH/fql+Q0bJo0fX5h+zDHbbqsHH7z99SWlbWzQoLQ3LKVvrB3nM2SI9LOfpekLFxbW3ahRqVtu8GDpvvvS9F//+qOPHTgwvQ5txwNmz5Y+85nUpfmJT6THgvT882n6nXcWHhuR3m8bNhR/Hm2am6Xvf1969910/5JLtn0+UPiWed55204bPLgwvzPO2Hb6XnsVpp900rbT99mnMP3EE9Nr2XYbNixtJ20mTtz28ZMmFaaPH5/eXyNHSnvskbbt9ttYff22jz/jjML0wYOlP/2p9PXXDdTsHn3hU+ejn/Strds/s2Xz5rRXunAhfPWraa+iN8+kyIt33oE5c+Av/zLt3d18M5x/ftozOfJIOOqo9Hfvvfve+rzrrsI3jYkTYcqUwl54JW3dmvb2dilyBnJra9qra2lJ228pZwe1nVs/cGDaK1yzJq3noUPTnmCxZba3ZQt88EFa7qBBpT22bS+6ri4Nb92avin069fz17ulJc1jl13Sc2v7ltH2Pl6zJt0fMCDd+vdPdbStNymt040b0zebDRtSjfvvn6b/4Q9pXNtjhg5Ne/7lnpXV/nWDrs/gkwrrB+CNN9K4urq0vurq0vKHDSvMt5zXrxu6s0fft4K+O5YtS199/+3fKnt64s5mB2ygFbN8eXpz+dQ+2wl1J+irfx59T40bB1dcUe0qal+thDykPlczK1kNvbvNzKw7Sgr6iJgcEa9HxLKIuLST6QMj4v5s+pyIGFvpQs3MrHuKBn1E1AE3ACcA+wOnRcT+HZqdC7wraRxwLXB1pQs1M7PuKWWP/lBgmaQ3JG0B7gNO7tDmZGB6NjwDODair52uYWa2cyrlYOwoYHm7+yuAz3XVRlJzRKwDRgBr2jeKiKnA1Ozu5ohY1J2i+4g96fD8aozrr65arr+Wa4far//Py33ADj3rRtI0YBpARDSWe4pQX+L6q8v1V08t1w75qL/cx5TSdbMSaH8+2+hsXKdtIqIfMBxYW24xZmZWeaUE/cvAvhHxqYgYAJwKzOzQZiZwVjZ8CvArVeuXWGZm9hFFu26yPvcLgCeAOuA2SYsj4irSNRdmArcCd0bEMuAd0odBMdN6UHdf4Pqry/VXTy3XDjth/VW7BIKZme0Y/mWsmVnOOejNzHKuKkFf7JIKfU1E3BYRq9uf9x8RH4uIJyPif7K/ffJSihExJiKejoglEbE4Ii7KxtdK/YMi4qWIWJDV//1s/Keyy20syy6/MaDatW5PRNRFxCsR8Vh2v2bqj4g3I+LViJjfdmpfrWw/ABGxe0TMiIjXImJpRBxeK/VHxJ9n673t9n5EfLvc+nd40Jd4SYW+5nZgcodxlwKzJe0LzM7u90XNwMWS9gcOA87P1net1L8ZOEbSeGACMDkiDiNdZuPa7LIb75Iuw9GXXQQsbXe/1uo/WtKEduef18r2A3Ad8EtJ+wHjSa9DTdQv6fVsvU8AJgIbgZ9Tbv3l/qeSnt6Aw4En2t2/DLhsR9fRjbrHAova3X8dGJkNjwRer3aNJT6PR4HjarF+YAgwj/TL7DVAv862qb52I/32ZDZwDPAYEDVW/5vAnh3G1cT2Q/pNzx/ITjyptfo71Hw88Hx36q9G101nl1QYVYU6euoTklZlw38CPlHNYkqRXVX0IGAONVR/1u0xH1gNPAn8HnhPUnPWpK9vQz8BLgFas/sjqK36BfzfiJibXcYEamf7+RTQBPyfrOvslogYSu3U396pwL3ZcFn1+2BsBSh9rPbp81QjYlfgIeDbkt5vP62v1y+pRemr62jSRfb2q3JJJYuIk4DVkuZWu5Ye+Lykg0ndredHxFHtJ/bx7acfcDBwo6SDgA106Obo4/UDkB3DmQI82HFaKfVXI+hLuaRCLXg7IkYCZH9XV7meLkVEf1LI3y3p4Wx0zdTfRtJ7wNOkro7ds8ttQN/eho4ApkTEm6Qrvx5D6jOulfqRtDL7u5rUP3wotbP9rABWSJqT3Z9BCv5aqb/NCcA8SW9n98uqvxpBX8olFWpB+8s+nEXq++5zsstF3woslXRNu0m1Un99ROyeDQ8mHV9YSgr8U7JmfbZ+SZdJGi1pLGlb/5WkM6iR+iNiaEQMaxsm9RMvoka2H0l/ApZHRNsVH48FllAj9bdzGoVuGyi3/iodVDgR+B2pr/Xyah/kKKHee4FVwFbSHsK5pH7W2cD/AE8BH6t2nV3U/nnS17qFwPzsdmIN1X8g8EpW/yLgimz8p4GXgGWkr7MDq11rCc9lEvBYLdWf1bkguy1ue7/WyvaT1ToBaMy2oUeAPWqs/qGki0QObzeurPp9CQQzs5zzwVgzs5xz0JuZ5ZyD3sws5xz0ZmY556A3M8s5B72ZWc456M3Mcu7/Ab3GANJbUPMaAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## Plot mean_reward \n", "episodes = list(range(0, 66))\n", "mean_reward = [0.401,0.139,0.137,0.237,0.109,0.212,0.211,0.207,0.142,0.143,0.208,\n", " 0.159,0.139,0.139,0.216,0.140,0.142,0.139,0.190,0.214,0.136,0.120,\n", " 0.141,0.208,0.139,0.141,0.141,0.141,0.140,0.207,0.116,0.139,0.140,\n", " 0.237,0.141,0.138,0.140,0.139,0.138,0.242,0.138,0.137,0.141,0.141,\n", " 0.137,0.140,0.142,0.137,0.139,0.208,0.238,0.138,0.139,0.136,0.138,\n", " 0.139,0.135,0.136,0.140,0.140,0.139,0.138,0.141,0.136,0.137,0.139]\n", "\n", "plt.plot(episodes, mean_reward, 'r--')\n", "plt.axis([0, 70, 0, 4])\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJztnXeYFFX2/t87MAGGkZxHEXElmR0DYkBFwYSi7ndxWRO6IJiXXVfMyoq6rIoRhAXElVURFSMqKEYQRQSJgwiSRYJIlEnn98fb91fV1dU9HYqe6dnzeZ55uivf7pl569S57z3XiAgURVGUzCerqhugKIqiBIMKuqIoSg1BBV1RFKWGoIKuKIpSQ1BBVxRFqSGooCuKotQQKhV0Y8w4Y8zPxpiFrnXDjTFLjTHfGWNeN8Y02LfNVBRFUSojngj9OQA9PeumAThURA4HsAzAkIDbpSiKoiRIpYIuIp8C2OpZ94GIlIUWvwRQuA/apiiKoiRA7QDO0Q/Ay9E2GmP6A+gPAPn5+cd06NAhgEsqiqJUY/buBRaGstTHHJPy6b755pvNItK0sv1MPEP/jTEHAnhbRA71rL8DQBGAiySOExUVFcmcOXMqvZ6iKEpGU1wM2OA1gPIqxphvRKSosv2SjtCNMVcCOA/AGfGIuaIoyv8MjRsDJ5wAtGuX1ssmJejGmJ4AbgVwqojsDrZJiqIoGU6TJsCsWWm/bDy2xRcBzALQ3hiz1hhzNYCnABQAmGaMmWeMGbWP26koipI5lJQAS5YAP/yQ1stWGqGLyKU+q8fug7YoiqLUDBYtAo4+mu/TmJHWkaKKoihBU1FRJZdVQVcURQma8vIquawKuqIoStC4I3RNuSiKomQwbkFPY/oliJGiiqIoips2bYDf/x5o2TKtl1VBVxRFCZrWrYFJk9J+WRV0RVGUoNm1C5g/H8jLA448EshKT3ZbBV1RFCVoZs0CzjyT77dvBwoK0nJZ7RRVFEUJGrdtMY0WRhV0RVGUoKkil4sKuqIoStC4o3IVdEVRlAymilIu2imqKIoSNEceCdx8M9CsWdo6RAEVdEVRlOBp0wZ47LG0X1YFXVEUJWi2bAG++gqoVw847jggNzctl1VBVxRFCZpp04BLQ1NJrFgBtG2blstqp6iiKErQqG1RURSlhqADixRFUWoIGqEriqLUENSHriiKUkPo1g14+GGgcWOW0k0TKuiKoihBc9BBwK23pv2yKuiKoihBs3YtMHMmUL8+cOKJWj5XURQlY5kyBfjDH4CePYHi4rRdVgVdURQlaNTloiiKUkNQH7qiKEoNobpG6MaYccaYn40xC13rGhljphljvg+9Nty3zVQURckgqnGE/hyAnp51twH4UER+B+DD0LKiKIoCABdfDDz3HDtHO3dO22UrtS2KyKfGmAM9qy8A0C30fgKAjwH8PcB2KYqiZC7t2vEnzSTrQ28uIhtC738C0Dyg9iiKomQ+xcX0oTdsSB96s2ZpuWzKnaIiIgAk2nZjTH9jzBxjzJxNmzalejlFUZTqz0svAf36Ab17A3Pnpu2yyQr6RmNMSwAIvf4cbUcRGS0iRSJS1LRp0yQvpyiKkkFUV5dLFN4EcEXo/RUA3gimOYqiKDUAt7OlOgm6MeZFALMAtDfGrDXGXA3gIQBnGmO+B9A9tKwoiqIA4SJencrnisilUTadEXBbFEVRagZVFKFrtUVFUZSgGTCANdHr1q1ePnRFURQlQQ46iD9pRgVdURQlaL7+GvjuOyAvD+jaFTjwwLRcVgVdURQlaJ5/HnjqKb5/4YW0CbpWW1QURQmaDPOhK4qiKNGoItuiCrqiKErQVNeBRYqiKEqCVNeBRYqiKEqC3H03MGgQ3x9wQNouq4KuKIoSNAcckFYht6igK4qiBM306cCaNUBJCeuhH3ZYWi6rgq4oihI048YBH3wAbNkCjBiRNkHXTlFFUZSgKS8HsrP5Xl0uiqIoGUxFhQq6oihKjcAdoevAIkVRlAymiiJ07RRVFEUJmpEj6XApKQEaN07bZVXQFUVRgqZlyyq5rAq6oihK0EyaxOj8p5+ALl1YEz0NqKAriqIEzb//DezcCXz5JXDnnWkTdO0UVRRFCZqKCiAriz/qclEURclgyssp5rVqqQ9dURQlo6mooJhrhK4oipLh2JRLmiN07RRVFEUJmjfeAETodKlXL22XVUFXFEUJmkaNquSyKuiKoihBM2oUI/N164DOnYHzzkvLZVXQFUVRgmbMGKBVK2D2bOCSS9Im6Cl1ihpjbjHGLDLGLDTGvGiMyQuqYYqiKBmLtS1mZWWGbdEY0xrAjQCKRORQALUA9AmqYYqiKBlLhtoWawOoY4ypDaAugPWpN0lRFKUaMG8ecNttdKskSqYNLBKRdQD+BWA1gA0AfhWRD7z7GWP6G2PmGGPmbNq0KfmWKoqipJNzzwUefpjzgiaKO0LPBB+6MaYhgAsAtAWwDcArxpg/icgL7v1EZDSA0QBQVFSUxK1OURSlCigsBNavBzZtApo0SezYuXMBYxip106f9ySVlEt3ACtFZJOIlAJ4DcCJwTRLURSlinnoIb5u3Jj4sXXqAHl5QH4+kJsbbLtikMqtYzWAE4wxdQHsAXAGgDmBtEpRFKWqadMGOPtsinOi3H8/cPDBwOrVtC9efnnw7fMhlRz6bACTAcwFsCB0rtEBtUtRFKVqee89RtnHH5/4sWPHAtOmARMmAO+8E3zbopBSckdE7gFwT0BtURRFqT4sXgx8+mlyx7p96BlkW1QURamZlJXR4TJgQOLHVlG1RRV0RVEUP0pL+bpqVeLHZujAIkVRlJpJWRlfk3G5AE6EnszApCTR4lyKoih+tG7N159+SvxYe4wI/ehpQgVdURTFj2HDGGEPG8a0Sa1aiZ8jjWIOqKAriqJEp0sXoF8/YO9eoG7d+I+77jrg1FOBFSuA7Gxg8OB910YXKuiKoih+DB4MbN0KjB+f+LHPPccBSV9+yZGiKuiKoihVyOLFFPSKCv4kUpPFulzUtqgoilINKC0F1qwBcnKA//43sWN1YJGiKEo1oqwMaNmSgpyo06WKInRNuSiKovhRVgY0bMiKiYkKekEBc+j5+WpbVBRFqXI6d6Yor1yZuKD/8su+aVMlqKAriqL48eyzfP366+QGF1UBKuiKoiixuPzyxPLgZWVA377ApZcCy5cDmzc7k2XsY1TQFUVR/DjvPKB9e+CRRxI7rqwMmDQJOOoo+tB//DFtgq4uF0VRgqNrVwqhl9JS4OqrOXIyU1i2DNiwgQK9YUP89kO7n9oWFUXJaGbO9J+hZ9YsYNw44Mor096kpCkt5WCi8eM5jdz69fEdZ9MzqdgWv/qKU9jNmpXQYSroiqLsew45hK+XXhrf/qedxunbguKeezgtXCKUlbEOS4sWXI63Y9QKuI3QkxH0HTuAH35wSvjGiebQFUXZ9+Tk8LWkJL79P/6YP1dcEcz177+fr61bAz17xndMWRkj9Pr1ubx9e+Q+69YxNXPaac46EaCwkF70xo2Bpk0Tb2/Xrsy9N2+e0GEq6Iqi7HvWrePrnDnOunfeAVavBgYODN/XTghx223BXb+ggFFvnTrxH3PqqcChhzo3IzuDkZtBg4A33wyfxKJBA5YMSIW8PKBNm4QP05SLoijBYgXQza+/8tVOGgGw83TQoMh9bZohPz+4NmVnA9dfT5GOl5deAm64gccC/k8XnTr5f95UWbyYddg3bUroMBV0RVGCQYTR9uTJ/tsA4MwzKz+PjYQTLYgVi5tuAs46C5gxI/FyuIWFtB127Bi5rVYtCr07Qt+6FTjnHGDqVGDEiOQ6gufPB+64g5NUJ4CmXBRFCQZjgCee8O/Is9GtO21x8sn+EWhuLl/r1QuubXffzdc//5mpnquuqvyYdu2AAQOAW28F/v53/30eeICv5eVOed3duynmvXsDCxYA06cn3l77PdmngzjRCF1RlGAQYTTaoUPkNitQDz7orLvqKqZBvNSqBRQVJdeZGI3Nm4HffqNA+uXC/Vi5knn3sjI6TmzayA93OiYI26I9X4LpHBV0RVGCYeNGYOJEYNWqyG224mCWS3KaNvW3Au7axc7TBQuCaVd5Oa/1z39SIONx2lRU8AaVnc1BRQcf7J9KatmSr+75RoMYWKQRuqIoVcrevdG39egBNGrECoaWKVOAf/wjct/Nm/maqlPEYsUxJyf+CN2mjWrXjt0petxxwOGHO2kiIJgIXQVdUZQqJZagAxRUt5hGG+gTb0okXqwQZ2dHtiEadh+3oPsdt307b0DufoPatZl2atCAg5LatUu8zQMG8LwNGyZ0WEqCboxpYIyZbIxZaoxZYozpksr5FEXJYGIJ+owZTK+sXRu5ze0QARzhfOmlYNrljnYHDwa+/77yY4wBfv97CnOsQVG//caSANZnD9A/vmQJcP75wF13sUBXouTmclBSVmISnWqE/jiA90SkA4AjACxJ8XyKomQqVtCPPjpymy3Kde65kdu8ka87og4Cdwdjo0bAgQdWfkzduqyY2KtX7Ajd+uiDfqqYNg0YMiTyZlcJSQu6MaY+gFMAjAUAESkRkW3Jnk9RlAynWTPgb39jx6gXK6oXXRR9m8WK45AhwbSrXj0O0jn+eE5WMWxY5ekhNzk5wFNP0cfuxZ7H/Rl++IGWzE8+AUaOjM977+XTT4GHH054+rpUIvS2ADYBGG+M+dYY829jTMTQLmNMf2PMHGPMnE0JjnpSFCWDOOAA4L77mPeNlkbZutVZ17Ila7V4/eadOgEnnUTbYBAUFPDmcNRRrAZ5xx30isdi3TrmwP/zH6Y9rrsOOOaYyP2uuYav7gh9xw7g88/5WX/8Efjss8TbXFKS1AjUVAS9NoCjAYwUkaMA7AIQUXxBREaLSJGIFDUN0leqKEr1oqSERbBatAD27AnfZgXPXbdlyBDmqb3UrQsce2y4cyTVdq1cSRGPt0hYSQl959ZyuGCBf/7fvb8lKJdLEimnVAR9LYC1IjI7tDwZFHhFUf4XmTrVmZnHm1OuWzdy/6Ii4K23wqN2gAW7xowBdu4Mpl1LlwIHHcT2xcqHu7GuFbt/167+MxfZm467Rk0Q5XPTLegi8hOANcaY9qFVZwBYnOz5FEXJcNx5aa9gDhzIvLLbtfHBB5yIeZun623RIkfMg5jtx90pGm+E7rYtAtH964WFwB//yAkwLEENLEpzhA4ANwCYaIz5DsCRAIaleD5FUTIVt6D7CabXA37vvXz1CqV7Od766bFw2xZjDRJy4x5YBET3r+/YwTy5+2nCpowaNqTgFxUl7FbBM88kNbAqpeJcIjIPQFEq51AUpYbgFkmv+I0ZA3z4IYUOCBc4r7ja5e++S6x+eTTcI0V79+aAnQYNYh/ToAHQrx/Qti2Xs7P9bwKnn06//KefssIiABx2GKeQA4AuXYD+/RNvc1ZW2jtFFUVRHGyEfsstziw/liWhISqDB/PVnYaIFqEH7UPPzubEEY0bh9de8eOAAziS1Xrqo6Vcbr01/BpBMWYMMHRowoepoCuKEgzHHkvb4gMPREbAJSUc1POHPzjLFm+O2QrnZZfFP49nLA45hGV9Dz6Yo0SHDPEvIBaLRx6JnFkJcMr/usX+66+BI48EvvkGeO452iV/+y2x602dyoFNCaKCrihKMBx7LPDXv3IovFfASkvpZlm6lMtW0B991EnDWC66iDeFOXOAX35JvV0HHMCZh1q2pIPmoYcqF/SPP2b+/JNPuHzhhXS6eOnRg6/uG9T27ZygYvduCv68eYl3jFZRp6iiKAr55RdgwgRGwgsXhm+zgnf22XzNywOefDJ8cmVLvXrO7ECJjOiM1a4FC3iuWPODuikrowjb1Mz8+czpR8N9Pq/Lxb0uXlTQFUWpUoYPd2qbeHPKtm64XZ+XR3H/178Ywbr56ivgscf4PtFUhR9Tp7LE7Y8/Ju9yufZaPn24sf7yli3Z+eldbwcWudfFiwq6oihVSiwf+rBhzEHb9Xv30hkycWKkPe+zz5zh8kFE6O5O1ngjdK8P3c+2aJdvuAFo395ZH0SEnpWVlMNH5xRVFCUYYgk6EO4UWbWKtkAgum2xceOEi1P54p3gwu+aXrwjRbOzI8sZ2HMsXMjO2xYtuNyoEe2M9esD++/PtFJlrhov06Yltn8IFXRFUYIhlqD/5S90muTnR26PFvlu3Ji4EPrhti127uzMLRqLgw4CbrzRmdc0O5udnW5ycmgtvOsu4IgjHAtjly703AOM3Hv3Tv0zxImmXBRFCYa9e5kmeOopVkx0s3AhxXnkSC67I2S/8rlZWcGIuT0fQFHOymL9lcomjjjiCODxx50h/X5zkebmOiIedD30225jp3GCqKArihIMffsCTz/NUrNt2oRvKy0FTjyR3nK7bKntSRSUlLAT8ZxzWO8lVc48Exg3ju6ZX39lzvvzz2MfU1rKFIsd0Xr77cCIEZHtXL7c2d/y7rucdm7ZMuCVV+j6Wb8+sTZPmVJ5G31QQVcUJRh69GChqvnzgS1bwreVltKT/cUXFGsb7X7wAY9xc++9PMfUqXSmpEqnTsBVVzHK/u03PkHEsiACwPPPsyaLLZl7/PFAt27h+6xd60x67Y7ed+zgDE1lZXz/ww+JR/DqclEUpUpZsYKCfeSRwNtvh28rLeWgopNOoqj+7nccRWkF0U2dOk452iBsi6tWAbNm8X2i5XPt08OCBcD06eH7RKtdo7ZFRVEynj//2XGueAWzc2fHsVJaCjRvzjIAgwYBr70Wvu9rrzk+9CBsi6NHs3QvkHz53Mcf5+xKfvv8/vfhTxlB2BarYMYiRVEUh717nenkvII+bhxdLgDFassWTgf3xhtAcXH4vu+8w8JYQDARujvaTXaCC79OUbv8pz+xXosliAi9YcPIAmdxoLZFRVGCIZagA+FiOmsWcMklXPYTyjp1OPy/sjK38eCOdrOzGXVXVp/cm3Lxq7Zol+fOZQrJlisoLAQuuIAWzcJCoFcv/xmbYmGrUyaICrqiKMGwdy/QrBnfe8XvrLM4E5HdVpkPPTsbWBzQBGjuCD0rK74OyhNO4GTSdoo5v5GibdsynTNwIOcsnTCB608/nT8AU0unnBLM54gDTbkoihIMe/cyVTBhglOEy7J4MSPYV18FmjSJ7UNPMn8cldLSxM930knAP/4RnqrxtrN5c/YbHHRQ8D703r2BF19M+DAVdEVRgmHYMHrQL788cmBRaSnTEhddxFSEFceGDSPTKjaivuQSnjNVBg4E/v1vZ/m664D//Cf2MTt2OLXOAeDqqyM98b/8wtrnpaXhYj9hAsV+40ZaL5s3j6w+GYuKCvrQly2L/5gQmnJRFCUYLr6YrzNnsgKhnb4NoOBt2wa8/z7TGTaiXbKEgudmyhS6Qjp2DGYKuqOOCu+0fPllvtpBTn48+CArQVqhbteOP24++4y5coDVHC27dgE//0xXT2kp3ycyo1EKMzZphK4oSjB8+SU936efDowaFb6ttBT49lugZ09Gnj16AJMn+3d61qrFFElubjAul7lzeZOx+DlWvJSWho9gLS4G/vtf/9x/bq7/DEzJ2hbtuZJIO2mErihKMHTvDgwY4O8IOe00RtvFxdx20EH86dsXOPRQTgtneeQRimReXjCCPnQoBz3Nn89lvw5OL2Vl4YI+dSrnSj3nHOcmZIX3ySfDI/RUbYspROgq6IqiBMPevRRiP0F/6y1gxgzWNikp4dyeK1Ywqvfyyiv0YOfmBlcP3S2Ofh2cXsrKwo/xG5BkP+Ppp4enY1KN0EV4vkaN4j8mhAq6oiipU1FBEczNjR4BuyeX+O9/WbOlffvotsXDDw+ufK47fdG0aeW+cG/KxW9Akn0/bx6wbp1jT+zQgU8eOTksYdC3L2u7x0vjxk7RrwRRQVcUJXVsJG0jdHcku2cPcMgh9KID3FZSQrHOzY0UdCvAzz4bTNu8Ebo7nx6N3r2ZCrL4RejduvHG9PTTdMV8/TXXn3MOfwCe44UXUmp+IminqKIoqWOFLjcXGDOGJWrd29au5ew9778PHHec4w3366BMsjBVzLYler4ePYDrr3eW/SL0du2ASy9leigRF0tlrF4NnHEGU1QJooKuKErq1KlD18p559HJ4rYJWrFr0oRRetOmjsh26gQceGD4uWrVYofo7bcD556bettGjAAeeshZvvNO4O67Yx+zYUP4XKc9ejACP+AAZ92PPwIffRQ5+vShh5jSKS1l9cm6dRMT5+3bed7Nm+M/JoSmXBRFSZ2cHMeH/vnnjNSPPZbLVuxKSij6RUVOhG6Hy7uxJQKuvDKxATnRsO2w2Jrssbj5ZtZMtzVVGjeOzIO/+CJvOhdfHC7oJSVMM9n8/549Tm2YeEjBtphyhG6MqWWM+dYY83bleyuKUiPZuRN47z1OlnzjjcD99zvbrEBt28ZSsx9/zJTMK6/EPmdeXjAul6lTw9008bhcvJ2ia9eybsvGjc46ew73yFfAuVkYk5zLpYoHFt0EILnSYIqi1AxWrWL9lk8/jbQt5uVxGH/79lwuKaETpFs3Th79pz+Fn2vgQHYkBuVDv+kmp3QvEN/AIq9tcelSeuy//95ZV1rKKHzIEGf0KUDxNoY/NkpPk6CnlHIxxhQCOBfAAwD+ksq5FEXJYNwuF69gtmjBaPznn7lcWkqniZ2qbeXK8HNNnEgxD0rQvZ2iyQws8usUteft0CH82IoKR8hthJ7IwKK8PE5SnUTp4FRz6CMA3AqgINoOxpj+APoDwAHuDgVFUWoOXttiZfXQH3uMFRg7d/b3oefkMKLv1o0DbexsR8ngdc20bMmbSWXHuAU92sCinByWNFi2jDMwAXTxDBjA982a8X0i2ldURG97EiSdcjHGnAfgZxH5JtZ+IjJaRIpEpKhp06bJXk5RlOpMLEH/5htWVfz4Yy7b6oTZ2f75bLvtqquYl09FzO353B2MTz8NTJsW+5gbbwT++ldn2d4Q3G3985+BSZPYOXrVVc76Cy7gRNQAhXzUKEbcaSCVHHpXAL2MMT8CeAnA6caY9DnoFUWpPrgF/aGHHEEDmDbZto3Wxi++4MhJK7Je8S8vZ3oiSB96Mr72889nqV+Le5SrpVMn2hm9N6XycidnLuJ8pnj56CM6c9z5+jhJWtBFZIiIFIrIgQD6APhIRP5UyWGKotREjjmG0XSnTvSgFxU529xVCU88EWjVyklXdOwYvm9ZGf3qBQUchdmmTXhd8mSYPp02RMsTTzgWy2gUFzO/bznkEFoYzzzTWTdnDs+dkxMu2n/9q1OHZelSpm4qc/S42bSJ505i0gz1oSuKkjpNmjBaBWgR3LyZg4wAR5hycoDnn6eI27TK3/8efp7cXEfAx43jqMk9e1Jrm/uGATDyrWygzx//yFz72yE3dl5eZOfnY48Bs2cD/fpxubSU7S8vdzpDk+kUrep66CLysYicF8S5FEXJQFau5KChXbuYbrnpJmebTUdkZwPXXsu887PPhqdl/MjL42tlTpft2+mD90OEsxW5Oxmjddq68doWd+wAHn2Ug40s7rQR4JzTz+WSQT50RVH+1/noIw4a2rIl0rbYujVHfTZr5ohpx450uDzySHgt8W3bgAsvZM0XO0FzZYOL6teP7iIpLWXn5TvvOOuSsS3u3AkMHhxe2Mvm5i+7jJNo2NmV3BG61kNXFCXjsKLr19F55JHA+PHO9tJSThbdqBHn5Vy82Nl3507gjTdYw6VVK66Lx4v+yy/+6/3EMZmRon6dorYfoEUL/lhS9aE3awZ07ZrU9Hsq6IqipI672mKslIYV0zvuoNB37Oh0KLqLXNla4hddxA7SZHGfz9KqFZ8Kysuj11v3plz8bIu2H6C4mE8of/oT23rWWUBhIfepX5+dpJ07x9/mCy/kTxKooCuKkjqxfOjPPce0x/LlzjabrnBHvu65ObOzKfivvlr5tVu2BPr08d/mPp9l4ED+xOLRR1kV0uI3UvSxx7j85ZfAoEHsFC4ooIPGumgaNgSGD6/8MwSECrqiKKnjFvSbbw6vz1JS4uSk33+fonfCCZEdiu7JLhKpNLh+PSN8vxGlyZwPiIyQ/UaK2gkwbEVGe61duxj977cf27VjBzt4bZ9AZTzzDOcpXbAgPO0TB9opqihK6vTrx7K5tWrRO37MMc42t6h26MBUio3Qf/c7x94I8Pi2bSmGS5eyZO2UKbGvPWkSz+03bVuzZizH6/adT57M4fnbtkU/5xdfAD/8EN6utWtZTMzy9tsc/eoV+4ED+XQB0L7ZoAEwdmzsz+Bm40Z+9iSm31NBVxQldQoL2ZEH0CI4erTTEehOe7z0Ejs9reXvggs4gXS9etynY0cO6DnrLAra1q3RLYkAO0MHDWJE7C5ta7GTaLgnXN68mZNVxPK3n3tueIVGgDci206A/QAjRkSmY/x86InaFmvXTqrkgQq6oiipM3Mma5oArD8+YIAj5G6nySOP0IM+axZw222xz2l96LFsi1u30ioJ+Av65s0UXXf07pc+8eLtFAWAhx/mZ7P49QMA4S6XZG2LSZY+UEFXFCV1JkwAbrmF770R69FHc0ILW1q3tJSVFFu1Yqdny5aczg1g5cLu3TmAJ56BRe7o3U/Q16xhu9wzH/lZEL14bYsA8M9/Au++G75PTg5wyimstmgLcKUaoXuLiSWAdooqipI6e/c6nX5ewezenT+AY1t89FGmaEpLOcuRFe1Nm4APP6RQxxOhuwXd1lt349cp6mdB9OIdWORuu8XaFvPz2RdgSTVC79DBKaOQIBqhK4qSOm5B90boJSWOKGdnA7t3c9TltGmR4urOt+flAZdfHllDxY0V9C5dImu2eM9nad6cN5NorpOKCv+Kj147pk2NbNjA6N12ovbpA1x/Pd/n5gL33ceiZPEycGD4DEgJoBG6oiip4xehWzG9/XZg5Eja+XJyKOh2P6/4uyPq7Gz/SaTdVFTQLz5mjP/gHb+Rot260ZETizfeYIVFN96SBtOns5N03ToWGevYEWjXjtPtWbKzgbvvjn2tAFFBVxQlddyCftFFwEknOcPh3Z1848bRLtihAwXSK/6J1jE5+2ymWsrK6Hhp2DB8uz28/pYxAAAeCUlEQVRvIjnprCygV6/I9d4IvVMnvlr7o922aRNvNM2b0xu/bh299/Xrx3f9QYOY8//00/jbbJue8BGKoiheRo2iJRGgqLZvH55OsYLavLkjutnZtAL26ePYCgsKOCw/P5/LLVqEV26MxuWX01vu5dRTWYLX7Yv/+muK8ezZ/ucqLaXH3HbUuo9zPzGMGsVzeNNGV1zBCTIACvr++wOPP175Z7Bs2kR3ThKooCuKkjr77w8cfDDfL1/OTk/3pNBW9N58k0PmAYr8YYfR7ti+PdedfTYwfz4HJwH0Ysdyubz8MtC7N28Sfi6XvDy2zZ0vLynh6M7t2/3PuX07Bfmtt8LXFxQ4HbUA8+RvvhnZCex2uVgveaK2xSRdLiroiqKkzsSJjgAuWsROz7VruewWqFdfBV54gXbCaPVX3OTmxhb0BQsoqoWFHGLvHSy0cCHwj3+ER7yVuVzKyvjqdbk89RT7AgAKdHm5My+q+3xul4sx/EnUtpgRPvREPpSiKJnD8OGcSAKIjFjPP99xfdjp2goLGfHOn8+ORTsz0KRJwPHHO3npvLzKbYsFBUzlAJHWxXnzgLvuCi+vW5kP3a73CvqkSfxx72OrQq5fz1mOgPAIHaC4p2lgUXo7RZctS+vlFEVJE7Fsi17Xx4YNwNChnBAjK4vuFxuFr1sHfPWVs39eXuUDi+rVY80WgGkXm64B/DtF/SonurERup9t0d5c3J23tWpxcJTFHaED/IyJCPqppyY1nyiQbkHfu9e/IpqiKJmNn6BbMd26lf/zDRs6wnr33ez8tLlzr23RnqNv39juECvohx3G1Ip7ogm/8wE8X8+e4eVx3URLueTkOL53d4ReUsJrn3kmcPLJfBoRcY4bPpyjZePlzjvj39dDegW9vJyPRPbxSFGUmkGskaJ9+7LeyldfRU4aUZlt8W9/i33dRo3oF2/ThsWyvPiNFC0sDK/J4qVVK45W7dgxfL17pOh++3EgUaNGjL6HDgXq1qWgu59IAODGG2N/hgBJvw992TIVdEWpabgF/eij2SHapAmX3Z2i997LlML558ceWOQW/hkzGOF36xZ53Weecd6vWcPj3FG630jRyqhbFzj99Mj12dlO9F6rFnDQQXxv+wbttVasYHRv5zldtow3AO/TQzROPJElhCdOjL/NIdLvcikuTvslFUXZx8yfD9x/P9/n5rKj0Aq8u5MvP5+CCXDdfvsB11zjjMosLGSU67b7DR7MMrvz58duw6GHAsOGha+7/nqmfPbbz1m3dSutjOPG+Z9nyxZaKdevD1//yit01QDsZH3oITp6atVie+3N6NJLWW3ScvzxwIMPxm67m23bKp/zNArpFfSWLcMN/oqi1Axat+ZkFAAtgvfd54if24b38cec0QhghF6/Poftn3IK111zTfgIyawsDsMvKADOOSdyMug+fRyxbN480ouek8Pcvbvfzk5W8euv/p/lhx/oWJk3L3y927ny88/AkCGsCmmvY0XY63LJykq8HnpG2BZbtQKOOiqtl1QUZR9TUcHo3NZH2baNqRUbUbtTLl9/TaFfu9YppiUS2wWy//4U9Y0bWTPFzWefMcUBUNC9tsX33ousu16ZDz2abfGFFzjhs/tYey53WQCvyyUZ22JGDCwqLwfmzk3rJRVF2ceUlAD33ENxBSIF84YbgCuvDN9Wt67TyZiVxfQFwMj+5JMjr3HMMYzsx4wBvvnGWb9jhzOLULNmkRH6p59y1KqbZG2LX3wBPP98+LFWeDds4AQYwP9QhP7TT5wc1n5hiqJkPjYqtvZCr2BecYXj/LACeMstrFni3Xft2vC5PN3cdx8wfrzzlC/i2BYB/5SL32QRNvJOdKSoOwr3Ruj16jnvy8sjI/REBP2Pf3RSUAmSXpdLXh6/kJUrwwvCK4qSucyaxVdbHMtrW1y5koLXtKkjehMmcARn06YUTrdtMVp0mp/vRPoAh/mLOILety8DRjd+5zMG+MMfnGqJXqKlXNx5cu+ApaFD6Xrp25fpJ7d3fvhwpo3iZfjw+Pf1kLSgG2P2B/A8gOYABMBoEYldUswWtlm2TAVdUWoKM2cyhWKnYPOmXLp1ow1w/PhwcbVi6I58K0s3bNvGgTeXXAIceSRvItYe2LWrM1G1JVpdFFsZ0o8uXeiZ9/Oh23Z26cKnASvczz3HdX37snywG1sSIF5SGHyZSsqlDMBgEekE4AQA1xljotzyQlgbk1oXFaXmsHw5hdUK5377UXht/Ra3qF52GfDkk3zv7lB0R76xOgSzs4Gnn+ZTQYMGLF976aXctnMn19s6MEBy+ej69YFjj3Uif4u1XIrwnM2ahY+OtWI/b1542ui776KnkfzIyfEfJBUHSQu6iGwQkbmh9zsALAHQOuZBtWvT2qSCrig1h3feCS81awxF0Z16saLqdnzY7Tfe6AwaOuKI2Pnj/HwK+bp1kdu++46Dcr780lk3ZgywalXkvu3bO/ZJLz/8wOPcNwaATwbbtvHzLV5M0d2wwfksVtAvvpguH8vFF8c/a5GI/3ymcRJIp6gx5kAARwGIUjHexbhxwHXXBXFZRVGqC95o9tZbneH17qh7wQJnwgor8kOHAhdeyPd33smJI2LRujUFfe5cpl1sMS87At3dMWqMvzju2kWHjB+zZwP9+/vXV7csWcJBTLYsr/spIxWXi+2QrSrbojGmHoBXAdwsIhEV440x/Y0xc4wxczZt2sSpnQ4/PNXLKopSHRg5kjlir2CNGOEMEHJH6GvW8PWLL5ybwO7dzjyj8WAFfdMmet2tCNqKi24v+qhRLJzlxR1Re4nmcnnvPXam7twZ6XLJyXGePFLxoSc6BZ+HlATdGJMNivlEEXnNbx8RGS0iRSJS1LRpU9Z8mDgxchSWoiiZx1tvUVTdAgaE55RHjOCsQnY9QIGzHX9HHMERogA7O//v/2Jf84ADKLq28qG9MdSrB9SpEx5Zv/suJ9Xw4o6ovUTzoS9fznroe/ZECu/MmbyW/WzeCL26C7oxxgAYC2CJiDxa2f7/n7Iy4NprnY4RRVEyk4oKdkJ6nSVAeAQ8YAAdIIAjVNde67/vhg2stRKL0aOZbrEpk4ICvhoT6UWP1inqtiB6iWVbBHic17bodqWkknKpXRv4y1+SLpGSig+9K4DLACwwxthw+3YReTfmUfn5LGz/yisUdVuoR1GUzGLpUnYSnnhi5DYbAVdU8Gm8sJApESuuixZF7gtQTGPVPwcc8fRG6ABTQDb1AkR3zVx8sVMN0kusgUW2jd5I+oknWBvmrruAZ59lmRPL8OGRfQzRyM8HHnkkvn19SMXl8rmIGBE5XESODP3EFnPLFVfw7jplSrKXVxSlqvn2W74ee2zkNpty2bWL0aYdMu+erNm7LxDffJoLFrD64pYtwGmnhYtlz57hk0lEi9DvvdexVXrp25edno0aha93u3b692fe33bETp8OvBbKOvfq5dSpAYAePfyfYvwoL6c2JjmavmomiT75ZBaknzChSi6vKEoA5OXRf37wwZHbVq/mHKPeSLaoCBg4MDw6dqdc4vGNl5ZyYujDDwc++oh5c8uyZdxmqV2bUa8f7lmF3DRoAHToEBmhFxSwQ1aEfQZ16jhPC+7P8PHH4dNtzpnj3PwqY8UK+vhjDXyKQdUIelYWcPnlidc4UBSl+nDxxbT4+UXdtpPUb8Ygbxrk6qs54AhgNHvSSbGv2zo03MXPi/788+yAtZ2QH33kTEDtpkeP6H732bOBxx6LjJJ79WKtmUMOAd5/n/ZLdweqTRtdcEH4xBvXXx9Z8TEaVelySYkbbuBdqFat2LN6K4qSedx/P/PZXnvfxo3A2LHhk0f068cAD2BlxGgDfiy2JswNN0RWZmzenGK+ZUvscxgTvVP0gw/YMRmLL79k3txG6EGVz/Wb1DoBqk7Qmzblo8Xu3bxTptARoChKmikv55Rqj0cp3/Taa/RteyPOPXv46p4taOtWVmKNl6wsp9Nx06bwbbZD1DpdBg9mqQAv8fjQvVbMefM4ycaiRTw2K8vZZ7/9HINHKi6XjI3QLdnZwIEHsnD8yJFV3RpFUeJh1SqKZjT3ho1YmzblxBA2kvZWYgSYcunRg+/btXMmkYiFLaHrvb7tpLSDi6ZMcapBetsXy4deu3Zkgaxt2zj6ddOmyEkonnrKce4EMbAo4yJ0S3Y2f+Hnnw8MGsRRZ6NHV3WrFEWJxdKlfO3QwX+79XkXFNA1YidUtpGne+Yht7hu3Ro9cnbz+ut8so8m6DZCjzb7jztF4qW01L9cgLuKZCw3TioReuvWrPvSrl18+3tIbz30aGRncwSWnU8wN5e2IBHagbp3T7qcpKIo+4DKBN0K5vbtTFUceihtgFYE3YWv3OmPeGyLlp07w/3eANC2LV0mhx4a+3znngscdpj/ecvK/I9x+9BFnHLgAEe/v/suX996i+2wPPhg/BF6mzacyCNJqj5Ct+TlMVJfvZp1kwH2Tp91FkeZTZnCwj3HHRe7aI6bGTPY2dK9u7Munru/oiixKS6m9dBODO2loIBCvXgxcOqpdI4A0aPleCa4cPP66xwtamuwW/LyeD3brmgR+mWXRS9Re8899KF7cY8UHTEivGbMwoUcLAnQC9++vbPt+OOdkbKVsWcPO4yT1KnqI+h+nH02vaxr19KK9OCDzLU1a8ZHmLfeYg/52WdHdo5MmEAhf/dd7l9Swjvv0Ufzj0xRlOQ59lg+RUfjrbeAadMic8J16zKfftppzr42PSMSv6D/9htf/SaPePNNXhugsDdoELlPaakz0tRL/fqONdJNfj6F2m90u33KqKjgzcZdIvzLL/nUEA8ffMBrL1gQ3/5eRCRtP8ccc4wkxe7dIlOmiKxd66x75hkRQGS//UTy8kSOPFJk61Zu+9e/uO2MM0R27HCO+flnkbp1RS691P86FRUiEyaIvPqqs/8dd4iUlCTXbkX5X2f6dP4vfvKJs+7440XOOstZnjZNZORIkfJykWuuEXnjjcrP+8knPO8HH0RuO+ookXPPjX38jTeK1K/vv23KFJHHHot9/DPPiNx2m7M8dCjbs3s3X4cOdbadc45IvNo3aRKPX7AgbDWAORKHxlbvCN1Spw7N+u67ZteuvBNv3Mh0zOLFzF8BjMovvJApG3enSdOmrMX+8svhI7kApnp69mRZgu++47o33gAeeICRvvcJwEYIlVFRwfNFG5W2r9ABW8q+wubGY/Hkk7QM+tnwZs9mJGrp3p3FurKyOLFEr16Vt8Hmzm1tdTfNmlWelo3VKfr660ypxGLGjPDSJfYJxOpCZS6XaDn1jLctJsvhh9MZk5fHNMz8+c7EGRMnsmSmu9PCMngwO10ffNBZN28ec/MzZ9J+ZGcXueYa5vW/+go44wzgl18ozFddRQ/uihXOOdas8a+/MHQo83wPPBDcZ6+M999nb7+fXas6IQJ89plTI1vJDObMYVri/fej7zNrFgMuv4Ey+fn8P7Rs3sycNWPT+NpgBd1P/G3FxfJyasSkSZH7xKq2GG3GoC1b6KyZMiUyN9+wISeCtueM5XIpLub57QQgbjLethgUHTo4TpgWLcK/UDfNmzP39+67/ENatIidKNnZjByuuy782L59mQ8sLmbP+O7dHPr7668sCyrC6L1NG6BPn/A/yLlzWVy/cWNWYXvxxdif4aefeDP58cfw9RUVwN/+xlxgv37Mx+3axW07dnB48+rVzv4LF/KP74Yb4u9dLyuLfGrZV5SXc4KD007jP8i55zp/yC+/zH4Ov2HdXrZv56ARO5GCG5Ho/7CJtjWI81iCaldVYh0usax1VjCPO44C6N7X6wF//HG6UjZv5v+e30AgL3Xr8m9/2LDIbVbQS0r4lO4OvNxtKCvzv4FEsy1WVDAAWbs20j3z5z/zf9DWjXFriDdC372b13VPU2fxjqxNkJoj6Inwt78xqm/ShOJ81VWMzjtFmeO6e3eK8Y4dFJEhQ1irYfp01o5o2pSRwquvcvZvy9ix3LZoEYXYXYEN4DmvvJLn/OUX/lEPHUqhc0et114L/OtfnG7rlVe4ffJkbps2jU8Phx/OGwjA6GfsWOCbbyiQXtasYXuXLOG+q1axYFJREXvYS0ootA8/zJtLvDcFy08/RaakrGDv2UP72kknMU02cCAdAHv3ci7HPn1YyGjGjMhzTp/uLO/cyVF711/vVLmz51+/nrPMt2zJm52X776Lb2RiWRl/J126BOOOKi9nB37nztE75JJlwQJ2EHqDgWSIla4rK+PfeEEBBwRGIyeHQcXUqUyXujsmt20Lnw8hO5t/Y7YESLzzadar5x+4NW/Ov2H39HB+7bOfx0s8tsVonbf2f8WdcvFG6EcdBdx+O5903CUQADpihg/378iNh3gS7UH9JN0pui8oL0/8mNLS8OO7dhVp0YKdpuXlIqedJpKfL/LOO84+K1dGXnfBApEnn3Q6dSsquO3JJ0VefpnrWrUSWbSI6999V+TBB7nfzp0ir78usmoVt23ZwuU2bURyckSGD+d+5eUiRx8tcsAB7KgpLRX57Tce07u3SJ067LAqKBBp0oRtufNObl+1ip/NPgTn5op07Oh8LsuePSKvvSZyww0i/fs767t1E6lVS6RTJ3YGtW7NjiHLvfey83n79vDv9oQT2FE1b174dfbuZXsKCvh5H36Yny0rS2TiRK4TEXnxRX4PTZuy8/v440WKi7ltwQKRL77gZwf4e7PXmTqV7X/2WZG5c9nhVVbGbZdfzv0fftjnDyIKO3eKjB/P77NfP37WPXvCzzd4cPzn++03kRdeELn7bp5v5Mjwzv6VK0VatuR5Dz5YZMOG+M8t4vz9ifB337gxO/V2747c9447eJ3nn499zn/8g/v16hW5DeDvyDJsGNcVF/N17NjE2u/l559Fli8X2biR53viich9PvmE36ef4aFXL5osvOzaxfM9+CD3OftsZ9u0aezoXbNG5LPP+GpZvFhkzhznuqtXi3z/Pc81bBi//1tu4f/It9/6fiTE2Sn6vyvoQbB0KZ00P/3E5TVrRBo2pKi7xcrNQw+JZGfzq7/gAkdk3XzxBc9x//3xt2X5ckeAbQ/5J5+I3HMP/zFXraIbqEsX549SRGTyZC5feWX4P7YI/+ieflrkr3/lzWrJEq4fO1bkpJMosIBIvXo8rz3+ww8pZr16ifTsKXLVVbzRxGLsWJ7rxReddVOmiFx2mciZZ3Lbyy/z+zrmGBFjKOZuZs0SadtW5LDD+E/k5qSTnLbedhudEEuXinz9Ndfn5TnfHyAye7Zz7IUX8gbovTlbFi8W+fJLR2QHDOA5atUSadTIEVobRPTvz21R/nll+XL+Dfz6a/j5jKHYAiIHHcTz7dxJ8WnQgDeRoqJwMYnFzJki7drxpn/jjRS3igqRQYN4jf3352cfMIC/+x07eMO8+urKz22Dj2ifb/NmZ9m60ubOje9mES8//MDzjRyZ2HG7d/v//5aVMVDIzhZZvz582wsv8FrLlkU/b2kpv7/TTuPyqaeKnH8+NcH9NzhmTETAqYJeVSxcyH8UrzhatmwROeUUClUsO+S6dU7kGS8rV4q8/bb/tu3bRW6+WeSII0ROPplRr2XFisSeWMaP5x9lv360jbmfXJJlypRw0d+2jX/whYWM2t02sF27IgXbUlrq/1lmzxZ56SVGbSLhvx97o1i6lP+YS5eGH7t6NW+w7mjzgQdEhgyhmNqbwF13cdu6dbyplZbyOtOni4wb5xy7datIs2a8Udl23Hcfo197wwWcG/qKFbTy7d3L/WfO5GextG0r8vnn4Z9r507u4/6cboEdNUqkdm3eGHr35lPh5MnO9o8+YsR46KFs66xZXL95s3/kngrjx/PzjhoVeVNPhaVL+RkWLvTfPm4cb07vvScyYwaf0Pyiee85H388cv3LL7PtZ53FG6D7b+izz/j/fv753Mfaordu5dPEfvvRSr11q8hf/iJy0UUq6IqyT/n3v0V+/3tnuVUr/vsUFfEf/PXXowuHH6+9xvSLpVkznq9zZ0Zsb74ZeWPxo6LC/8b/2GNOlH3uubw5tmjh3Hx79eIT4i+/cDnajTAd7N1LMV2/nqmHaE8uQfPAA3xicz+ZHXdccgFKcbHI4YczAm/Xjk9Ylmuv5bmbNRP5+9+ddJ5l8WInJReFeAXdcN/0UFRUJHPmzEnb9RRln+Kd3b06na+8nKOlp0+n60kEuOQSVjLMz+dkzC1aaI2k3bvpXMnOpkmiU6f4O2XjZeNGOsi6dEn63MaYb0SkqNL9VNAVRVGqN/EK+v+mbVFRFKUGooKuKIpSQ1BBVxRFqSGooCuKotQQVNAVRVFqCCroiqIoNQQVdEVRlBqCCrqiKEoNISVBN8b0NMYUG2OWG2NuC6pRiqIoSuIkLejGmFoAngZwNoBOAC41xkQpKK4oiqLsa1KJ0I8DsFxEVohICYCXAFwQTLMURVGUREmlCk1rAO7JINcCON67kzGmP4D+ocW9xpiFKVyzKmgCYHNVNyIBMq29gLY5HWRae4HMa/O+bG+beHYKuKxYJCIyGsBoADDGzImnwEx1ItPanGntBbTN6SDT2gtkXpurQ3tTSbmsA7C/a7kwtE5RFEWpAlIR9K8B/M4Y09YYkwOgD4A3g2mWoiiKkihJp1xEpMwYcz2A9wHUAjBORBZVctjoZK9XhWRamzOtvYC2OR1kWnuBzGtzlbc3rRNcKIqiKPsOHSmqKIpSQ1BBVxRFqSGkRdAzoUSAMWZ/Y8wMY8xiY8wiY8xNofWNjDHTjDHfh14bVnVb3RhjahljvjXGvB1abmuMmR36rl8OdVhXG4wxDYwxk40xS40xS4wxXTLgO74l9Dex0BjzojEmr7p9z8aYccaYn93jPKJ9r4Y8EWr7d8aYo6tJe4eH/i6+M8a8boxp4No2JNTeYmNMj3S3N1qbXdsGG2PEGNMktFwl3/E+F/QMKhFQBmCwiHQCcAKA60LtvA3AhyLyOwAfhparEzcBWOJafhjAYyJyMIBfAFxdJa2KzuMA3hORDgCOANtebb9jY0xrADcCKBKRQ0EDQB9Uv+/5OQA9Peuifa9nA/hd6Kc/gJFpaqOb5xDZ3mkADhWRwwEsAzAEAEL/h30AdA4d80xIV9LNc4hsM4wx+wM4C8Bq1+qq+Y5FZJ/+AOgC4H3X8hAAQ/b1dQNo9xsAzgRQDKBlaF1LAMVV3TZXGwvBf9TTAbwNwIAj1Wr7ffdV/QOgPoCVCHXGu9ZX5+/YjohuBLrC3gbQozp+zwAOBLCwsu8VwLMALvXbryrb69nWG8DE0PswzQCddV2qw3ccWjcZDE5+BNCkKr/jdKRc/EoEtE7DdZPGGHMggKMAzAbQXEQ2hDb9BKB5FTXLjxEAbgVQEVpuDGCbiJSFlqvbd90WwCYA40Npon8bY/JRjb9jEVkH4F9g9LUBwK8AvkH1/p4t0b7XTPif7Adgauh9tW2vMeYCAOtEZL5nU5W0WTtFPRhj6gF4FcDNIrLdvU14q60WPk9jzHkAfhaRb6q6LQlQG8DRAEaKyFEAdsGTXqlO3zEAhPLOF4A3o1YA8uHz2F3dqW7fayyMMXeAKdCJVd2WWBhj6gK4HcDdVd0WSzoEPWNKBBhjskExnygir4VWbzTGtAxtbwng56pqn4euAHoZY34EK12eDuanGxhj7ICx6vZdrwWwVkRmh5YngwJfXb9jAOgOYKWIbBKRUgCvgd99df6eLdG+12r7P2mMuRLAeQD6hm5CQPVtbzvwRj8/9H9YCGCuMaYFqqjN6RD0jCgRYIwxAMYCWCIij7o2vQngitD7K8DcepUjIkNEpFBEDgS/049EpC+AGQAuCe1WbdoLACLyE4A1xpj2oVVnAFiMavodh1gN4ARjTN3Q34htc7X9nl1E+17fBHB5yIlxAoBfXamZKsMY0xNMIfYSkd2uTW8C6GOMyTXGtAU7Gr+qija6EZEFItJMRA4M/R+uBXB06O+8ar7jNHUknAP2Wv8A4I6q6MyIo40ngY+k3wGYF/o5B8xLfwjgewDTATSq6rb6tL0bgLdD7w8C/9iXA3gFQG5Vt8/T1iMBzAl9z1MANKzu3zGA+wAsBbAQwH8A5Fa37xnAi2COvxQUlqujfa9g5/nTof/HBaCDpzq0dzmYd7b/f6Nc+98Ram8xgLOry3fs2f4jnE7RKvmOdei/oihKDUE7RRVFUWoIKuiKoig1BBV0RVGUGoIKuqIoSg1BBV1RFKWGoIKuKIpSQ1BBVxRFqSH8P0Y5ERqKqIx1AAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "150" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Plot loss and accuracy \n", "episodes = list(range(0, 150))\n", "\n", "loss = [2.750,1.9326,1.599,1.4933,1.3641,1.3544,1.2858,1.2228,1.2695,1.165,1.2079,\n", " 1.0799,1.1265,1.242,1.2760,1.219,0.966,0.9580,1.0700,1.1492,1.1680,1.0672,\n", " 0.9353,1.0579,0.8656,1.0098,0.837,0.8859,1.0678,0.9264,0.8049,0.9255,0.8928,\n", " 0.9739,0.8459,0.8857,0.9969,0.9506,0.9345,0.960219,0.831869,1.0556, 0.8367,\n", " 0.8366,0.8056,1.006,0.8444,0.983,0.9342,0.8919,0.8765,0.8173,0.8173,0.8597,\n", " 0.877,0.8935,0.8714,0.8827,0.9247,0.9506,1.0173,0.8194,0.9933,0.8126,0.8047,\n", " 0.9586,0.8423,0.969,0.9432,0.8538,0.9458,0.8530,0.8371,0.9035,0.7926,0.8675,\n", " 0.8354,0.8754, 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,\n", " 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,0.7918,0.9274,\n", " 0.7951, 0.7180,0.6970,0.7397, 0.7380,0.8324,0.7980,0.8889, 0.8174, 0.8040,0.796,\n", " 0.9588, 1.750, 2.289, 6.869,0.908,0.927,6.778,0.895,6.555,0.788,0.701,6.674,2.567,\n", " 2.43,2.234,1.070,3.657,2.296,0.889,0.8122,6.668,6.516,0.774, 0.730,0.739,0.831,7.99,\n", " 6.665,0.847,0.801,6.596,0.756,0.754,13.629,0.728,0.771,1.055,0.734,0.879,0.810,6.769]\n", "\n", "plt.plot(episodes, loss, 'r--')\n", "plt.axis([0, 150, 0, 12])\n", "plt.show()\n", "len(loss)\n" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XmYFOW1x/HvERAEFxQnSkDFGNSgN2qcENyNKxoDiZobokZNVNRHr5p4r49bcpXodUsUjVuIG+4aRUFEjXHfdVBAFpcRBUQQBAURZZtz/zjd6ZmmZ7pmpmd6pvh9nqef6ap6u+pUV8+pt956q8rcHRERSZe1yh2AiIiUnpK7iEgKKbmLiKSQkruISAopuYuIpJCSu4hICiVO7mbWwczeMrOxBaZ1NrP7zKzazF4zsz6lDFJERBqnMTX304Fp9Uw7Dvjc3b8LXAVc1tzARESk6RIldzPrDfwEuKmeIoOBkZn3DwD7mpk1PzwREWmKjgnLDQfOAtarZ3ovYBaAu680s0VAD+Cz2oXMbCgwFKBbt247b7vttk2JWURkjTV+/PjP3L2iWLmiyd3MDgHmuft4M9u7OUG5+whgBEBlZaVXVVU1Z3YiImscM5uRpFySZpndgEFm9hFwL7CPmd2ZV2Y2sFlmwR2BDYAFiaMVEZGSKprc3f0cd+/t7n2AIcDT7n5UXrExwDGZ94dnyuiOZCIiZZK0zX01ZjYMqHL3McDNwB1mVg0sJHYCIiJSJo1K7u7+LPBs5v0fa43/BvhFKQMTEZGm0xWqIiIppOQuIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkruIiIppOQuIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkruIiIppOQuIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkWTu5l1MbPXzWyimU0xswsLlDnWzOab2YTM6/iWCVdERJJI8gzVZcA+7r7EzDoBL5rZY+7+al65+9z91NKHKCIijVU0ubu7A0syg50yL2/JoEREpHkStbmbWQczmwDMA55099cKFDvMzCaZ2QNmtllJoxQRkUZJlNzdfZW77wj0Bvqb2fZ5RR4B+rj794EngZGF5mNmQ82sysyq5s+f35y4RUSkAY3qLePuXwDPAAPzxi9w92WZwZuAnev5/Ah3r3T3yoqKiqbEKyIiCSTpLVNhZt0z79cB9gfeySvTs9bgIGBaKYMUEZHGSdJbpicw0sw6EDuD+919rJkNA6rcfQxwmpkNAlYCC4FjWypgEREpzqIzTOurrKz0qqqqsixbRKS9MrPx7l5ZrJyuUBURSSEldxGRFFJyFxFJISV3EZEUUnIXEUkhJXcRkRRSchcRSSEldxGRFFJyFxFJISV3EZEUUnIXEUkhJXcRkRRSchcRSSEldxGRFFJyFxFJISV3EZEUUnIXEUkhJXcRkRRSchcRSaGiyd3MupjZ62Y20cymmNmFBcp0NrP7zKzazF4zsz4tEayIiCSTpOa+DNjH3XcAdgQGmtmAvDLHAZ+7+3eBq4DLShumiIg0RtHk7mFJZrBT5uV5xQYDIzPvHwD2NTMrWZQiItIoidrczayDmU0A5gFPuvtreUV6AbMA3H0lsAjoUWA+Q82sysyq5s+f37zIRUSkXomSu7uvcvcdgd5AfzPbvikLc/cR7l7p7pUVFRVNmYWIiCTQqN4y7v4F8AwwMG/SbGAzADPrCGwALChFgCIi0nhJestUmFn3zPt1gP2Bd/KKjQGOybw/HHja3fPb5UVEpJV0TFCmJzDSzDoQO4P73X2smQ0Dqtx9DHAzcIeZVQMLgSEtFrGIiBRVNLm7+yRgpwLj/1jr/TfAL0obmoiINJWuUBURSSEldxGRFFJyFxFJISV3EZEUUnIXEUkhJXcRkRRSchcRSaHyJffq6rItWkQk7cqX3Bctgq+/LtviRUTSrLzNMp9/XtbFi4ikVXmT+8KFZV28iEhaqeYuIpJC5UvunTtDTU3ZFi8ikmblS+7bbw977VW2xYuIpJn6uYuIpFD5kvt778H115dt8SIiaVa+5L50KUyZUrbFi4ikWfmSe8eO6i0jItJCkjwgezMze8bMpprZFDM7vUCZvc1skZlNyLz+WGhedXTooH7uIiItJMkDslcCZ7r7m2a2HjDezJ5096l55V5w90OSL1k1dxGRllK05u7uc9z9zcz7L4FpQK9mL7lrV+jbt9mzERGR1TWqzd3M+gA7Aa8VmLyLmU00s8fMbLt6Pj/UzKrMrGr+2mvDnXc2OmARESkucXI3s3WBB4Ez3H1x3uQ3gS3cfQfgr8DDhebh7iPcvdLdKysqKpoas4iIFJEouZtZJyKx3+Xuo/Knu/tid1+SeT8O6GRmGzc404ULYZtt4ta/IiJSUkl6yxhwMzDN3a+sp8ymmXKYWf/MfBc0OGP3uJBJPWZEREouSW+Z3YBfA2+b2YTMuHOBzQHc/UbgcOBkM1sJfA0McXdvcK4dOsTfzz+HLbdsSuwiIlKPosnd3V8ErEiZa4FrG7fkzKJVcxcRKbnyXaFau+YuIiIlVb7k3qkTHHAAbLhh2UIQEUmrJG3uLbTkjvDEE2VbvIhImul+7iIiKVTe5D5gAJy+2n3IRESkmcqb3JcsgY8/LmsIIiJpVN7kvuGG6gopItICypvcN9pIXSFFRFqAau4iIilUvq6QAHvuCd26lTUEEZE0Km9y/+1v4yUiIiVV/n7u7vESEZGSKW9yHzUKOneGd98taxgiImlT3uTetSusWKEeMyIiJVb+rpCgHjMiIiVW/q6QoJq7iEiJqeYuIpJC5U3u3bvDiSfCdtuVNQwRkbRJ8oDszczsGTObamZTzGy12zhauMbMqs1skpn9INHSO3SAG2+EffdtQugiIlKfJDX3lcCZ7t4PGACcYmb98socBPTNvIYCNySOYOVK+OqrxMVFRKS4osnd3ee4+5uZ918C04BeecUGA7d7eBXobmY9E0UwYAD88peNi1pERBrUqDZ3M+sD7AS8ljepFzCr1vDHrL4DwMyGmlmVmVXNnz8/RnbvrhOqIiIllji5m9m6wIPAGe6+uCkLc/cR7l7p7pUVFRUxUrf9FREpuUTJ3cw6EYn9LncfVaDIbGCzWsO9M+OK021/RURKLklvGQNuBqa5+5X1FBsDHJ3pNTMAWOTucxJFkK2519QkjVlERIpIcsvf3YBfA2+b2YTMuHOBzQHc/UZgHHAwUA0sBX6TOIIDDoD114fly6FLl0aELiIi9TEv0+12KysrvaqqqizLFhFpr8xsvLtXFitX/vu5A8ydCzNmlDsKEZHUaBvJvbISzj+/3FGIiKRG20ju220HU6aUOwoRkdRoO8l92jRYtarckYiIpELbSe7ffAMffljuSEREUqHtJHeAqVPLG4eISEq0jeT+H/8BI0fGiVUREWm2JBcxtbxu3eDoo8sdhYhIarSNmjvAe+/B6NHljkJEJBXaTnIfMSLu664eMyIizdZ2kvt228GyZTB9erkjERFp99pWcgddzCQiUgJtJ7n3yzyWVcldRKTZ2k5yX3dd2GILJXcRkRJoG10hs0aPhp7JnqstIiL1a1vJfYcdyh2BiEgqtJ1mGYhH7V1yCVx7bbkjERFp19pWcl9rLXjpJfjDH+K5qiIi0iRJHpB9i5nNM7PJ9Uzf28wWmdmEzOuPzYro4ovhiy/giiuaNRsRkTVZkpr7bcDAImVecPcdM69hzYpohx3giCNg+HCYM6dZsxIRWVMVTe7u/jywsBViyRk2DFasgIsuatXFioikRana3Hcxs4lm9piZbVdfITMbamZVZlY1f/78+ue21VZw5ZVw1FElCk9EZM1i7l68kFkfYKy7b19g2vpAjbsvMbODgavdvW+xeVZWVnpVVVXjIxYRWYOZ2Xh3L/rwi2bX3N19sbsvybwfB3Qys42bO18AZsyA//5vWLq0JLMTEVlTNDu5m9mmZmaZ9/0z81zQ3PkCkdz/8he4666SzE5EZE2RpCvkPcArwDZm9rGZHWdmJ5nZSZkihwOTzWwicA0wxJO09SSxxx6w445w9dVQolmKiKwJit5+wN1/VWT6tUDLXFJqBqefDr/5DTz9NOy7b4ssRkQkbdrWFaqFDBkCFRVwzTXljkREpN1o+8m9Sxc46STYdddompkxA84+WydZRUQa0LbuClmf886Le82Ywfvvw2WXRZK/++4YJyIidbT9mjtA586w6abxfr/94P/+D+69N5K8iIispn0k93xnnx1t8eeeC48+Wu5oRETanPaZ3M3g5pthp53g/PPjPvAiIvJv7TO5A3TtGo/le/zxuA98bVVVze8XP2cO3HlnbnjZsubNT0SkFbXf5A7QuzdsskncQfLCC+Ok60UXwQ9/WDcxN9aIEbDNNnDiifDZZ3DddVBZCQ3d7ExEpA1p38k9a+LEeMjH1lvHU5yOPBIGDoShQ6N23xiTJkVS/+EPY74bbwz9+sEHH8RFVJ991jLrICJSQulI7pWVcOutsGRJJPk77oANN4QXXoiTrytXRrmvv44a/uzZ9c/r4othvfXggQfgu9+NcT/+MYwZA+++G238IiJtXDqSO0RtffHi6EFjBh07RqJ+5x0YOTLKdO4MF1wQtftC5s2Dhx+GU0+NnUNt++0H++8Pzz/foqshIlIK6UnuAJ061R3++c/hRz+C44+HyZPjxOuJJ8bFT4Xaz7/1rdgZnHlm4fnvskvU+ht7dezy5TBuHLz8cuM+JyLSROlK7vnMchc63Xhj/D399Oj58re/1S2bbbrZckvo0aPw/H73O1i4MHrqJDFxIhx7bJz0/clP4MAD4dNPG70aIiKNle7kDrDXXjBrVu7GY9/7Xpxsve66qFFnnXpq1PQb6kLZtSt06JB82WPGwIMPwuDBcMstsbzHHmvaeoiINEL6kztEl8nafeHPOCNuSDZ9egyPGAE33QQ9exa/V82ll8aOIIk//CGaf267LW5b/OGHUZMXEWlha0Zyz3fAAVBdHb1hTjst2uH33z8SdzEffRR96Bu6KvaOO+DFF+N9ly658d/+dvzN7lRERFrImpnczaJ55d574a9/jbb0sWNh/fWLf3bAAFi0KE68FjJ5MpxwAlxxReHp48bFTuXpp5sev4hIEWtmcodoW3/uubhHzZVXJm9L32WX+PvKK4XnecIJsMEG8Pe/F/78PvvA5pvDWWet3r4/cyY88UTydajt5Zej+emjj5r2eRFJlSTPUL3FzOaZ2eR6ppuZXWNm1WY2ycx+UPowW4BZJODf/rZxn9t66+gDXyi5P/44vPpq9K//1rcKf75LFzjnHBg/Hl56qe60HXaIk71ffNG4mCCOFGbPjp45bdGjj6qnkEgrSlJzvw0Y2MD0g4C+mddQ4Ibmh9WGmcGhh8JGG9Ud7x4XSPXpA8cc0/A8jjoKuneHa/MePXvccfH3n/9sXEw1NbFTOfxwWGedxn22NSxcCD/9aZy4TruZM+Grr8odhUjx5O7uzwMLGygyGLjdw6tAdzPrWaoA26SbboLLL687rqYmkvall65+MVW+bt3iiGHcOPjyy9z4yy6LnUb+PeqfegoGDYobpBVSVQVz58a9b/7nf+IIoi154YXY+d19dzSFJfHKK3BDO6snvPEGbLFFXCUtUm7uXvQF9AEm1zNtLLB7reGngMp6yg4FqoCqzTff3Nu9FSua/tl589znz4/3M2e6/+//ui9Y4H7EEe4VFe6rVuXKmrmD+4cfFp7Xuee6d+gQ8+zVy/2gg5oeVzEff+z+2WeN+8wZZ0T84P773yf7TLb87NmNj7EcZs1y79kzYp47d/XpQ4a4X3RR68clqQNUeYK83aonVN19hLtXuntlRUVFay66tJYtixraMcfEvWZGjYrafH0160IqKuKOkwDDh8etihcvjhOyF1yQm1d1daS5iy+OJp9Ctt8++u5XVMQRweOPR/NAUp98Eid6G7o9wrRpcXTyi19A//4wZUrhcl99VffiMIBnn4357703PPNM8Xiyt3fo0gXWXbfutKuuavydPlvaV1/FkdWSJfD226uf95g5M3pmFXuwTHaXVt+0efNKF7OkX5I9AA3X3P8G/KrW8LtAz2Lz3HnnnVt299bS9twzV7sE9z593Jcta9w85s5133bb+PyQIYXLnHOO+1prRY35q6/cV65seJ4ffRQ1/T/+MVkMNTXuBx4YMbz8cuEy8+e7r7uu+1lnub/yivsmm7ivt577JZe4v/12zMPd/fzz3ddZx/1Xv8p9duHCiGfYsHiZxRFKQ8aNi3gef7zu+M8/j/mfcIL70qVxtPP118nWsyX913/FNnr00TiaO/FE91tvzU0fPTr3O3n11frnc8QR7rvvnvs+a/vXv9zXXtv9+OPd33+/5KtQ1KpV7g88EL/B1jBxYvy2ZDUkrLmXIrn/BHgMMGAA8HqSebb75L5ypfsHH7g/8YT7dde5jx/f+HmsWpX7p6/9+blz3R96KJbRs6f7IYe4v/SSe9eu7s89V3ceU6eu3kwycKB7796Fm42mTau7g7jhhlj+tdfWH+fZZ0dSnjIlhmfNct9jj1zs//pXjL/oIvftt49EV7sJ6cMP3efMcX/hhSg/alTD38vpp7t36eL+6afuf/tb7LDc3a+5Jj5fVeU+Zky833XX8jTdTJiQa1Z75JH4DWT16+e+//51y8+a5d6xY6xDfU45Jdbpqady4154IX4L06e7n3xyfC8bbOD+ySd1P9vSifC88yK2yy9v2eW4x+82+9tqbStWxPdfaAfbRpQsuQP3AHOAFcDHwHHAScBJmekGXAd8ALxNPe3t+a92n9xL5bHH3C+8sO64iy7Ktd1OmhSJZPFi986d3U87rW7ZPfd033HHuuNGj3Y/8sio6db2xBMx3+9/PxLie+/FDuOAA+LHPHFi3Rqne7Tjd+tW+Mhi1iz3v/+9bu151qxo/z/zzNXLL1vm/r3vud9xR4NfiS9eHEcRM2ZEvBdfHPFtt517ZWWu3D/+EbFVVLg/+WTD8yyVlSvd//Sn2IHVd3R05plRy16ypG6SKHau4uuvY2e+zz65cXvt5b755rkd9bRp8Tv4z//MlckeGTzyiPvddzd8dFf7XE5S48blzvs8+2zjP99Y2e0OcT6qNWUrEI880rrLbYSS1txb4qXk3oA334xNk59oBw2KGnn2H/SzzyLJnH9+svnuvXc0qfz855EALrnEfcMNo8nH3f2kkyJxfPpp7jNnnRX/2FOnJo//l7907949ktVZZzXvH2W33eJo4MUX4zu56aa606dOjaRv5j5iRNOX4x7fyYsvRu305ZdXr73Nneu+334RxxFHrL7zzHryySgzdmzEVFlZvCnq/PPj6OzPf/Z/N5FlfwdXXFG37LBh7p06uVdXRzPJFlvEdzBqVJS/4YbCy7j66tgRvvdeoq/D3eOIYcMNo0LQWk0y7lGpgag8tJaamlwz6YEHtt5yG0nJvT2rqcnVlN59Nzd+5Ej/d7vtihXuxx0Xw2+8UXg+EybkDtdfey3K/uUvdcvUbtKYOjXKDBuWG7fPPpHIGuPdd6MJZ8GCXHt7/vrVV7scPTpqxtnzF9deGzH9+c/u/ftHbTjfkiXRZJFNWk8+GUc4SZJRdke5fLn7ppvmaowQCfPRR2P6ww9Hc0iXLpFwGjps/+abOCI65ZRIElttFeW/+Saa2PKbZt55J7eOX37p3qNHnLf49a/jyCR/J/LNN1GDd4+eUuD+/POxjB//OJLxvHl1PzNmTO43deWVq8e8dGnhdXnoofheqqtj/pMnR3NkqeR/j59+6v7FFzG+Vy/3ww4r3bIKLa+25cvdhw+PSlD+/14bouTe3vXvv/oPbOHCaLf9/e8jSUC0hxf6wS5bFrW0Qw6J4RNOiNr04sUNL3fgwFhGtjY/c2bdmnxjPPxwLvFkTZ8eyeKeewp/5mc/i2aI7DrNnRtHJ+eem3y5118fnznwwEiEhUyd6n7wwXWbQC691P2++3LNTf37507qTpjg/pvfRNNVEscfHydaO3aMo5esHXeMo5Hazjsv4s22o7/6apw07dRp9Wa42rI7haOPzo2bMiWWeeSRuR3hhAmxk9h558KJ+euvY6dw1FG57V5dnZue3UkuXRo7tzPOSPYd1GfWrNjBHXJI/BbeeSc37eST3TfeOGJ68033RYuatyz3mM9JJ7l/5zvxP/HMMw2XnzMnfm9N/d23MCX39m7mzOidkO+WW+KQ9YIL6j/8zrrwwtjEkydHrWTChOLLffzx+Exzt8+8ebkacO0Eu2KF+/rrRzIZPjyaCrI9SJYvj144Q4fWndcBB9RtY07i5ptj2YcdtvqJ5TvvjGTXo0ck3oZqc805sXb77bkjrawLLogadDZxrFoVzSr5zQAvv+zet2/dJJvvttuimS2/X/3ZZ8dys9/ZqadGLbj2UVp+4rr//jhP0LVrfGdrreX+z3+uvsyBA9232abB1W7Q7NnuG20U8W21VRxJ1t6Rd+vmfuyxTZ9/IaNGRW+vQYOi2aVz5zhCrO2TT6LJrzWbnppIyV2iTb5r17o1u2JqaqLnR5IdQUOWL4+fV8+eq0878kiv0/yRrdk/+2wMP/hg3fJN7e541VUxv5NPzo0bOjTG7bFHrpbaUgYPjqRS+yTmW2/F8rPnB557LobvvHP1zxfbsdTUFG5OWbUqmqayO5VVq+qemLz//jgqqKqKpqNsop8+PRI7xJFHoSaw4cNj+vTpDcdWX7wHHhjdWfN7l11/fST2bG+orMsvjx11U2R7My1bFr9H9/if6N8/jtJqy1aEsjvTmppoyho7tmnLbkFK7hKOPz4280MPtf6yp08v3E1x1apoYlqwIP7Zskmsd++I9YsvShfDn/7k/tOf5oa//e2o2Tbn6uKkDj00mlxqq6mJayLWWSe+h3/8w33rrQsn0paSvW5hvfXi+37iibrTFy6s/7PZpqBiR42FrFoVzTGFTpI+/3wc0eU3We2+e9OOIj/5JI7M8s8xuceRZPY3N2JEnNfp1avu0VNNTSy3X7821y1SyV3CrFmR3GbMKHckxWUvdCq12ucZGkpcrWXChLoJrhzJ45xz/N/nbBoju3MaPLjhcuPHx/mFPfeMpsRi53rco1kmv1dR7W7BSTz0UJxsHjgwdqDFTopmz13B6k01t94a4++6K9myW4mSu4jUb+nSOGpoyhHMW281fKKzpib65/fuHecNssmz0DmkYsaPj8/efnvh6dOnR/fUjz6KdenRI7e8hi7Mqx3r66/HxXL51wCsWBEXya2/fml7CDVT0uRuUbb1VVZWelVVVVmWLSLNtHJlPFD+88/jyWQbbBCPkTz0UBgyJJ4tsPbacf+kV16Je+sMGgT77de45dTUxLON118f3n8/xh15ZCxz003h4YcjltGjYa+9YM6ceAzm4sUwbFjxZyIXM2NGPGdh223j0ZkdOzZvfiVgZuPdvbJYufJHKiLtT8eO8fD3rl1hq60i2U6aBDvtFNN79cqV3XXXeDXFWmvFTfEmTsyN69AhbnZXVQXf+Q7cdRf07RvTevaM216XyhZbwC23xM3h2kBibwzV3EVE2pGkNfc19xmqIiIppuQuIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkruIiIppOQuIpJCiZK7mQ00s3fNrNrMzi4w/Vgzm29mEzKv40sfqoiIJFX0eloz60A8AHt/4gHZb5jZGHefmlf0Pnc/tQViFBGRRkpSc+8PVLv7dHdfDtwLDG7ZsEREpDmSJPdewKxawx9nxuU7zMwmmdkDZrZZSaITEZEmKdUJ1UeAPu7+feBJYGShQmY21MyqzKxq/vz5JVq0iIjkS5LcZwO1a+K9M+P+zd0XuPuyzOBNwM6FZuTuI9y90t0rKyoqmhKviIgkkCS5vwH0NbMtzWxtYAgwpnYBM+tZa3AQMK10IYqISGMV7S3j7ivN7FTgCaADcIu7TzGzYcTjnsYAp5nZIGAlsBA4tgVjFhGRIvSwDhGRdkQP6xARWYMpuYuIpJCSu4hICim5i4ikkJK7iEgKKbmLiKSQkruISAopuYuIpJCSu4hICim5i4ikkJK7iEgKKbmLiKSQkruISAopuYuIpJCSu4hICim5i4ikkJK7iEgKKbmLiKSQkruISAolSu5mNtDM3jWzajM7u8D0zmZ2X2b6a2bWp9SBiohIckWTu5l1AK4DDgL6Ab8ys355xY4DPnf37wJXAZeVOlAREUkuSc29P1Dt7tPdfTlwLzA4r8xgYGTm/QPAvmZmpQtTREQao2OCMr2AWbWGPwZ+VF8Zd19pZouAHsBntQuZ2VBgaGZwmZlNbkrQ7djG5H0nawCt85pB69x6tkhSKElyLxl3HwGMADCzKnevbM3ll5vWec2gdV4ztPV1TtIsMxvYrNZw78y4gmXMrCOwAbCgFAGKiEjjJUnubwB9zWxLM1sbGAKMySszBjgm8/5w4Gl399KFKSIijVG0WSbThn4q8ATQAbjF3aeY2TCgyt3HADcDd5hZNbCQ2AEUM6IZcbdXWuc1g9Z5zdCm19lUwRYRSR9doSoikkJK7iIiKVSW5F7sdgZpYGabmdkzZjbVzKaY2emZ8RuZ2ZNm9n7m74bljrWUzKyDmb1lZmMzw1tmbklRnblFxdrljrGUzKy7mT1gZu+Y2TQz22UN2Ma/y/ymJ5vZPWbWJW3b2cxuMbN5ta/FqW+7Wrgms+6TzOwH5Ys8p9WTe8LbGaTBSuBMd+8HDABOyazn2cBT7t4XeCoznCanA9NqDV8GXJW5NcXnxK0q0uRq4HF33xbYgVj31G5jM+sFnAZUuvv2RCeLIaRvO98GDMwbV992PQjom3kNBW5opRgbVI6ae5LbGbR77j7H3d/MvP+S+KfvRd1bNYwEflaeCEvPzHoDPwFuygwbsA9xSwpI3/puAOxJ9BbD3Ze7+xekeBtndATWyVzT0hWYQ8q2s7s/T/T8q62+7ToYuN3Dq0B3M+vZOpHWrxzJvdDtDHqVIY5Wk7lL5k7Aa8Am7j4nM2kusEmZwmoJw4GzgJrMcA/gC3dfmRlO27beEpgP3JppirrJzLqR4m3s7rOBPwMziaS+CBhPurdzVn3btU3mNJ15dPedAAABqElEQVRQbWFmti7wIHCGuy+uPS1zoVcq+qKa2SHAPHcfX+5YWlFH4AfADe6+E/AVeU0wadrGAJl25sHEju3bQDdWb75IvfawXcuR3JPcziAVzKwTkdjvcvdRmdGfZg/ZMn/nlSu+EtsNGGRmHxFNbfsQ7dHdM4fvkL5t/THwsbu/lhl+gEj2ad3GAPsBH7r7fHdfAYwitn2at3NWfdu1Tea0ciT3JLczaPcy7c03A9Pc/cpak2rfquEYYHRrx9YS3P0cd+/t7n2Ibfq0ux8JPEPckgJStL4A7j4XmGVm22RG7QtMJaXbOGMmMMDMumZ+49l1Tu12rqW+7ToGODrTa2YAsKhW8035uHurv4CDgfeAD4DzyhFDK6zj7sRh2yRgQuZ1MNEO/RTwPvAvYKNyx9oC6743MDbz/jvA60A18A+gc7njK/G67ghUZbbzw8CGad/GwIXAO8Bk4A6gc9q2M3APcU5hBXGEdlx92xUwogfgB8DbRE+isq+Dbj8gIpJCOqEqIpJCSu4iIimk5C4ikkJK7iIiKaTkLiKSQkruIiIppOQuIpJC/w9HlyYRyqvynQAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "100" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Plot loss and accuracy por 100 episodes, Loss is decrasing and accuracy is growing during the first 100 episodes \n", "episodes = list(range(0, 100))\n", "\n", "loss_Bolzman = [2.750,1.9326,1.599,1.4933,1.3641,1.3544,1.2858,1.2228,1.2695,1.165,1.2079,\n", " 1.0799,1.1265,1.242,1.2760,1.219,0.966,0.9580,1.0700,1.1492,1.1680,1.0672,\n", " 0.9353,1.0579,0.8656,1.0098,0.837,0.8859,1.0678,0.9264,0.8049,0.9255,0.8928,\n", " 0.9739,0.8459,0.8857,0.9969,0.9506,0.9345,0.960219,0.831869,1.0556, 0.8367,\n", " 0.8366,0.8056,1.006,0.8444,0.983,0.9342,0.8919,0.8765,0.8173,0.8173,0.8597,\n", " 0.877,0.8935,0.8714,0.8827,0.9247,0.9506,1.0173,0.8194,0.9933,0.8126,0.8047,\n", " 0.9586,0.8423,0.969,0.9432,0.8538,0.9458,0.8530,0.8371,0.9035,0.7926,0.8675,\n", " 0.8354,0.8754, 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,\n", " 0.9470,0.7485,0.7522,0.7466,0.7914,0.7704,0.8512,0.8665,0.8145,0.7918,0.9274,\n", " 0.7951, 0.7180]\n", "\n", "plt.plot(episodes, loss_Bolzman, 'r--')\n", "plt.axis([0, 110, 0, 4])\n", "plt.show()\n", "len(loss)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Testing for 10 episodes ...\n", "Episode 1: reward: 210.000, steps: 488\n", "Episode 2: reward: 210.000, steps: 502\n", "Episode 3: reward: 210.000, steps: 490\n", "Episode 4: reward: 210.000, steps: 499\n", "Episode 5: reward: 210.000, steps: 499\n", "Episode 6: reward: 210.000, steps: 507\n", "Episode 7: reward: 210.000, steps: 502\n", "Episode 8: reward: 210.000, steps: 508\n", "Episode 9: reward: 210.000, steps: 498\n", "Episode 10: reward: 210.000, steps: 490\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Evaluate the algorithm for 10 episodes \n", "dqn.test(env, nb_episodes=10, visualize=True)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "## Save the weights of an agent from a HDF5 file\n", "dqn.save_weights('dqn_{}_weights.h5f'.format(env), overwrite=True)\n", "## Load the weights of an agent from an HDF5 file \n", "dqn.load_weights('dqn_{}_weights.h5f')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Information about the agent (https://github.com/matthiasplappert/keras-rl/blob/master/rl/core.py)\n", "dqn.metrics_names()\n", "dqn.layers()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Conclusion DQN-Agent\n", "\n", "#### The DQN agent with Neural Network model and Boltzmann Gumbel Q Policy\n", "#### shows stable learning during training the first 100 episodes. Mean-reward grows in the test from the beggining \n", "#### The test reward comes the highest of the experiment " ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training for 100000 steps ...\n", " 966/100000: episode: 1, duration: 19.852s, episode steps: 966, steps per second: 49, episode reward: 440.000, mean reward: 0.455 [0.000, 200.000], mean action: 7.621 [0.000, 8.000], mean observation: 72.770 [0.000, 228.000], loss: 23.349495, mean_absolute_error: 0.133976, acc: 0.910995, mean_q: 0.321679\n", " 2220/100000: episode: 2, duration: 20.781s, episode steps: 1254, steps per second: 60, episode reward: 440.000, mean reward: 0.351 [0.000, 200.000], mean action: 7.652 [0.000, 8.000], mean observation: 72.743 [0.000, 228.000], loss: 17.760113, mean_absolute_error: 0.094084, acc: 0.919393, mean_q: 0.646693\n", " 3085/100000: episode: 3, duration: 7.325s, episode steps: 865, steps per second: 118, episode reward: 460.000, mean reward: 0.532 [0.000, 200.000], mean action: 7.603 [0.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 25.900519, mean_absolute_error: 0.109459, acc: 0.906250, mean_q: 0.724962\n", " 3920/100000: episode: 4, duration: 6.853s, episode steps: 835, steps per second: 122, episode reward: 480.000, mean reward: 0.575 [0.000, 200.000], mean action: 7.686 [0.000, 8.000], mean observation: 72.701 [0.000, 228.000], loss: 26.829130, mean_absolute_error: 0.104061, acc: 0.926859, mean_q: 0.794797\n", " 4924/100000: episode: 5, duration: 8.052s, episode steps: 1004, steps per second: 125, episode reward: 240.000, mean reward: 0.239 [0.000, 50.000], mean action: 7.601 [0.000, 8.000], mean observation: 72.781 [0.000, 228.000], loss: 2.223164, mean_absolute_error: 0.064898, acc: 0.913260, mean_q: 0.863236\n", " 5799/100000: episode: 6, duration: 7.158s, episode steps: 875, steps per second: 122, episode reward: 440.000, mean reward: 0.503 [0.000, 200.000], mean action: 7.543 [0.000, 8.000], mean observation: 72.829 [0.000, 228.000], loss: 25.469583, mean_absolute_error: 0.096295, acc: 0.898169, mean_q: 0.897363\n", " 6678/100000: episode: 7, duration: 7.295s, episode steps: 879, steps per second: 120, episode reward: 840.000, mean reward: 0.956 [0.000, 400.000], mean action: 7.575 [0.000, 8.000], mean observation: 72.777 [0.000, 228.000], loss: 116.441804, mean_absolute_error: 0.142503, acc: 0.902050, mean_q: 0.919481\n", " 7795/100000: episode: 8, duration: 8.628s, episode steps: 1117, steps per second: 129, episode reward: 250.000, mean reward: 0.224 [0.000, 50.000], mean action: 7.655 [0.000, 8.000], mean observation: 72.716 [0.000, 228.000], loss: 2.105452, mean_absolute_error: 0.055605, acc: 0.920251, mean_q: 0.946919\n", " 9231/100000: episode: 9, duration: 11.622s, episode steps: 1436, steps per second: 124, episode reward: 360.000, mean reward: 0.251 [0.000, 50.000], mean action: 7.570 [0.000, 8.000], mean observation: 72.549 [0.000, 228.000], loss: 2.009200, mean_absolute_error: 0.060428, acc: 0.905226, mean_q: 0.952722\n", " 10253/100000: episode: 10, duration: 8.108s, episode steps: 1022, steps per second: 126, episode reward: 240.000, mean reward: 0.235 [0.000, 50.000], mean action: 7.652 [0.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 2.237449, mean_absolute_error: 0.055788, acc: 0.915769, mean_q: 0.958375\n", " 11275/100000: episode: 11, duration: 8.014s, episode steps: 1022, steps per second: 128, episode reward: 490.000, mean reward: 0.479 [0.000, 200.000], mean action: 7.599 [0.000, 8.000], mean observation: 72.781 [0.000, 228.000], loss: 22.073673, mean_absolute_error: 0.085744, acc: 0.907933, mean_q: 0.971059\n", " 11984/100000: episode: 12, duration: 5.775s, episode steps: 709, steps per second: 123, episode reward: 250.000, mean reward: 0.353 [0.000, 50.000], mean action: 7.626 [0.000, 8.000], mean observation: 72.810 [0.000, 228.000], loss: 3.251150, mean_absolute_error: 0.069292, acc: 0.911017, mean_q: 0.977070\n", " 12855/100000: episode: 13, duration: 7.077s, episode steps: 871, steps per second: 123, episode reward: 440.000, mean reward: 0.505 [0.000, 200.000], mean action: 7.636 [0.000, 8.000], mean observation: 72.786 [0.000, 228.000], loss: 25.574995, mean_absolute_error: 0.084563, acc: 0.914943, mean_q: 0.981379\n", " 13990/100000: episode: 14, duration: 9.386s, episode steps: 1135, steps per second: 121, episode reward: 1650.000, mean reward: 1.454 [0.000, 800.000], mean action: 7.607 [0.000, 8.000], mean observation: 72.837 [0.000, 228.000], loss: 371.897181, mean_absolute_error: 0.188477, acc: 0.919753, mean_q: 0.990182\n", " 15105/100000: episode: 15, duration: 14.405s, episode steps: 1115, steps per second: 77, episode reward: 850.000, mean reward: 0.762 [0.000, 400.000], mean action: 7.587 [0.000, 8.000], mean observation: 72.805 [0.000, 228.000], loss: 91.851788, mean_absolute_error: 0.114778, acc: 0.905745, mean_q: 0.992449\n", " 15901/100000: episode: 16, duration: 13.268s, episode steps: 796, steps per second: 60, episode reward: 520.000, mean reward: 0.653 [0.000, 200.000], mean action: 7.560 [0.000, 8.000], mean observation: 72.665 [0.000, 228.000], loss: 28.777944, mean_absolute_error: 0.102847, acc: 0.905660, mean_q: 0.993742\n", " 16892/100000: episode: 17, duration: 16.525s, episode steps: 991, steps per second: 60, episode reward: 530.000, mean reward: 0.535 [0.000, 250.000], mean action: 7.529 [0.000, 8.000], mean observation: 72.740 [0.000, 228.000], loss: 33.318019, mean_absolute_error: 0.092642, acc: 0.893939, mean_q: 0.994124\n", " 17419/100000: episode: 18, duration: 8.805s, episode steps: 527, steps per second: 60, episode reward: 120.000, mean reward: 0.228 [0.000, 10.000], mean action: 7.518 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 1.243997, mean_absolute_error: 0.056959, acc: 0.897338, mean_q: 0.994407\n", " 18320/100000: episode: 19, duration: 15.112s, episode steps: 901, steps per second: 60, episode reward: 440.000, mean reward: 0.488 [0.000, 200.000], mean action: 7.604 [0.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 24.761886, mean_absolute_error: 0.082639, acc: 0.911111, mean_q: 0.995430\n", " 19158/100000: episode: 20, duration: 6.628s, episode steps: 838, steps per second: 126, episode reward: 440.000, mean reward: 0.525 [0.000, 200.000], mean action: 7.530 [0.000, 8.000], mean observation: 72.818 [0.000, 228.000], loss: 26.591505, mean_absolute_error: 0.089191, acc: 0.902031, mean_q: 0.996318\n", " 20011/100000: episode: 21, duration: 6.920s, episode steps: 853, steps per second: 123, episode reward: 460.000, mean reward: 0.539 [0.000, 200.000], mean action: 7.614 [0.000, 8.000], mean observation: 72.760 [0.000, 228.000], loss: 26.203765, mean_absolute_error: 0.088394, acc: 0.910798, mean_q: 0.997042\n", " 20943/100000: episode: 22, duration: 7.347s, episode steps: 932, steps per second: 127, episode reward: 250.000, mean reward: 0.268 [0.000, 50.000], mean action: 7.655 [0.000, 8.000], mean observation: 72.785 [0.000, 228.000], loss: 2.483114, mean_absolute_error: 0.054482, acc: 0.921590, mean_q: 0.997791\n", " 21510/100000: episode: 23, duration: 4.377s, episode steps: 567, steps per second: 130, episode reward: 170.000, mean reward: 0.300 [0.000, 10.000], mean action: 7.566 [0.000, 8.000], mean observation: 72.784 [0.000, 228.000], loss: 1.587112, mean_absolute_error: 0.063345, acc: 0.908127, mean_q: 0.997974\n", " 22501/100000: episode: 24, duration: 7.652s, episode steps: 991, steps per second: 130, episode reward: 440.000, mean reward: 0.444 [0.000, 200.000], mean action: 7.605 [0.000, 8.000], mean observation: 72.749 [0.000, 228.000], loss: 22.501602, mean_absolute_error: 0.074465, acc: 0.918182, mean_q: 0.998464\n", " 23134/100000: episode: 25, duration: 4.901s, episode steps: 633, steps per second: 129, episode reward: 210.000, mean reward: 0.332 [0.000, 10.000], mean action: 7.651 [0.000, 8.000], mean observation: 72.756 [0.000, 228.000], loss: 1.724502, mean_absolute_error: 0.061043, acc: 0.924051, mean_q: 0.998674\n", " 24017/100000: episode: 26, duration: 6.855s, episode steps: 883, steps per second: 129, episode reward: 840.000, mean reward: 0.951 [0.000, 400.000], mean action: 7.701 [0.000, 8.000], mean observation: 72.812 [0.000, 228.000], loss: 115.891073, mean_absolute_error: 0.126790, acc: 0.933107, mean_q: 0.999062\n", " 25116/100000: episode: 27, duration: 8.704s, episode steps: 1099, steps per second: 126, episode reward: 240.000, mean reward: 0.218 [0.000, 50.000], mean action: 7.641 [0.000, 8.000], mean observation: 72.795 [0.000, 228.000], loss: 2.091278, mean_absolute_error: 0.052662, acc: 0.912568, mean_q: 0.999349\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 25683/100000: episode: 28, duration: 4.560s, episode steps: 567, steps per second: 124, episode reward: 190.000, mean reward: 0.335 [0.000, 10.000], mean action: 7.630 [0.000, 8.000], mean observation: 72.795 [0.000, 228.000], loss: 1.711272, mean_absolute_error: 0.066766, acc: 0.904594, mean_q: 0.999385\n", " 26702/100000: episode: 29, duration: 7.994s, episode steps: 1019, steps per second: 127, episode reward: 450.000, mean reward: 0.442 [0.000, 210.000], mean action: 7.610 [0.000, 8.000], mean observation: 72.756 [0.000, 228.000], loss: 23.866865, mean_absolute_error: 0.076911, acc: 0.910609, mean_q: 0.999505\n", " 27526/100000: episode: 30, duration: 6.403s, episode steps: 824, steps per second: 129, episode reward: 520.000, mean reward: 0.631 [0.000, 200.000], mean action: 7.714 [0.000, 8.000], mean observation: 72.666 [0.000, 228.000], loss: 27.502925, mean_absolute_error: 0.091450, acc: 0.935601, mean_q: 0.999648\n", " 28375/100000: episode: 31, duration: 6.663s, episode steps: 849, steps per second: 127, episode reward: 440.000, mean reward: 0.518 [0.000, 200.000], mean action: 7.669 [0.000, 8.000], mean observation: 72.782 [0.000, 228.000], loss: 26.230695, mean_absolute_error: 0.081821, acc: 0.925708, mean_q: 0.999724\n", " 29178/100000: episode: 32, duration: 8.344s, episode steps: 803, steps per second: 96, episode reward: 290.000, mean reward: 0.361 [0.000, 50.000], mean action: 7.578 [0.000, 8.000], mean observation: 72.741 [0.000, 228.000], loss: 3.113726, mean_absolute_error: 0.070727, acc: 0.899002, mean_q: 0.999793\n", " 30010/100000: episode: 33, duration: 6.812s, episode steps: 832, steps per second: 122, episode reward: 840.000, mean reward: 1.010 [0.000, 400.000], mean action: 7.683 [0.000, 8.000], mean observation: 72.794 [0.000, 228.000], loss: 122.562868, mean_absolute_error: 0.135093, acc: 0.930205, mean_q: 0.999827\n", " 30987/100000: episode: 34, duration: 8.184s, episode steps: 977, steps per second: 119, episode reward: 840.000, mean reward: 0.860 [0.000, 400.000], mean action: 7.513 [0.000, 8.000], mean observation: 72.750 [0.000, 228.000], loss: 104.393781, mean_absolute_error: 0.128820, acc: 0.894467, mean_q: 0.999898\n", " 31840/100000: episode: 35, duration: 6.706s, episode steps: 853, steps per second: 127, episode reward: 500.000, mean reward: 0.586 [0.000, 200.000], mean action: 7.594 [0.000, 8.000], mean observation: 72.674 [0.000, 228.000], loss: 26.466181, mean_absolute_error: 0.092597, acc: 0.906103, mean_q: 0.999918\n", " 32373/100000: episode: 36, duration: 4.210s, episode steps: 533, steps per second: 127, episode reward: 140.000, mean reward: 0.263 [0.000, 10.000], mean action: 7.525 [0.000, 8.000], mean observation: 72.866 [0.000, 228.000], loss: 1.362516, mean_absolute_error: 0.058625, acc: 0.907895, mean_q: 0.999926\n", " 33181/100000: episode: 37, duration: 6.368s, episode steps: 808, steps per second: 127, episode reward: 450.000, mean reward: 0.557 [0.000, 200.000], mean action: 7.551 [0.000, 8.000], mean observation: 72.751 [0.000, 228.000], loss: 27.561425, mean_absolute_error: 0.093955, acc: 0.897150, mean_q: 0.999936\n", " 33935/100000: episode: 38, duration: 5.940s, episode steps: 754, steps per second: 127, episode reward: 240.000, mean reward: 0.318 [0.000, 50.000], mean action: 7.614 [0.000, 8.000], mean observation: 72.769 [0.000, 228.000], loss: 3.023288, mean_absolute_error: 0.063794, acc: 0.908367, mean_q: 0.999946\n", " 34732/100000: episode: 39, duration: 6.283s, episode steps: 797, steps per second: 127, episode reward: 240.000, mean reward: 0.301 [0.000, 50.000], mean action: 7.621 [0.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 2.833605, mean_absolute_error: 0.063442, acc: 0.905779, mean_q: 0.999952\n", " 35523/100000: episode: 40, duration: 6.237s, episode steps: 791, steps per second: 127, episode reward: 250.000, mean reward: 0.316 [0.000, 50.000], mean action: 7.716 [0.000, 8.000], mean observation: 72.799 [0.000, 228.000], loss: 2.894485, mean_absolute_error: 0.056825, acc: 0.934177, mean_q: 0.999958\n", " 36105/100000: episode: 41, duration: 4.556s, episode steps: 582, steps per second: 128, episode reward: 150.000, mean reward: 0.258 [0.000, 10.000], mean action: 7.586 [0.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 1.353258, mean_absolute_error: 0.057389, acc: 0.905336, mean_q: 0.999964\n", " 36952/100000: episode: 42, duration: 6.637s, episode steps: 847, steps per second: 128, episode reward: 440.000, mean reward: 0.519 [0.000, 50.000], mean action: 7.530 [0.000, 8.000], mean observation: 72.594 [0.000, 228.000], loss: 3.869566, mean_absolute_error: 0.090140, acc: 0.892435, mean_q: 0.999973\n", " 37835/100000: episode: 43, duration: 6.949s, episode steps: 883, steps per second: 127, episode reward: 240.000, mean reward: 0.272 [0.000, 50.000], mean action: 7.652 [0.000, 8.000], mean observation: 72.839 [0.000, 228.000], loss: 2.552832, mean_absolute_error: 0.056240, acc: 0.916100, mean_q: 0.999982\n", " 38553/100000: episode: 44, duration: 5.641s, episode steps: 718, steps per second: 127, episode reward: 320.000, mean reward: 0.446 [0.000, 50.000], mean action: 7.653 [0.000, 8.000], mean observation: 72.705 [0.000, 228.000], loss: 3.684632, mean_absolute_error: 0.076542, acc: 0.914923, mean_q: 0.999985\n", " 39438/100000: episode: 45, duration: 6.912s, episode steps: 885, steps per second: 128, episode reward: 450.000, mean reward: 0.508 [0.000, 200.000], mean action: 7.605 [0.000, 8.000], mean observation: 72.841 [0.000, 228.000], loss: 25.235985, mean_absolute_error: 0.084998, acc: 0.911765, mean_q: 0.999990\n", " 40406/100000: episode: 46, duration: 8.010s, episode steps: 968, steps per second: 121, episode reward: 500.000, mean reward: 0.517 [0.000, 200.000], mean action: 7.512 [0.000, 8.000], mean observation: 72.763 [0.000, 228.000], loss: 23.345646, mean_absolute_error: 0.090591, acc: 0.890383, mean_q: 0.999993\n", " 41195/100000: episode: 47, duration: 6.431s, episode steps: 789, steps per second: 123, episode reward: 840.000, mean reward: 1.065 [0.000, 400.000], mean action: 7.549 [0.000, 8.000], mean observation: 72.821 [0.000, 228.000], loss: 129.524892, mean_absolute_error: 0.149651, acc: 0.901015, mean_q: 0.999995\n", " 42077/100000: episode: 48, duration: 7.383s, episode steps: 882, steps per second: 119, episode reward: 240.000, mean reward: 0.272 [0.000, 50.000], mean action: 7.596 [0.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 2.581690, mean_absolute_error: 0.060674, acc: 0.906924, mean_q: 0.999996\n", " 42820/100000: episode: 49, duration: 6.010s, episode steps: 743, steps per second: 124, episode reward: 320.000, mean reward: 0.431 [0.000, 50.000], mean action: 7.661 [0.000, 8.000], mean observation: 72.792 [0.000, 228.000], loss: 3.586110, mean_absolute_error: 0.073265, acc: 0.921833, mean_q: 0.999997\n", " 43785/100000: episode: 50, duration: 8.156s, episode steps: 965, steps per second: 118, episode reward: 840.000, mean reward: 0.870 [0.000, 400.000], mean action: 7.511 [0.000, 8.000], mean observation: 72.791 [0.000, 228.000], loss: 106.296034, mean_absolute_error: 0.128120, acc: 0.900415, mean_q: 0.999997\n", " 44492/100000: episode: 51, duration: 5.798s, episode steps: 707, steps per second: 122, episode reward: 340.000, mean reward: 0.481 [0.000, 50.000], mean action: 7.644 [0.000, 8.000], mean observation: 72.764 [0.000, 228.000], loss: 3.898969, mean_absolute_error: 0.081017, acc: 0.915014, mean_q: 0.999998\n", " 45008/100000: episode: 52, duration: 4.049s, episode steps: 516, steps per second: 127, episode reward: 110.000, mean reward: 0.213 [0.000, 10.000], mean action: 7.616 [0.000, 8.000], mean observation: 72.874 [0.000, 228.000], loss: 1.149696, mean_absolute_error: 0.052781, acc: 0.908738, mean_q: 0.999998\n", " 45864/100000: episode: 53, duration: 6.788s, episode steps: 856, steps per second: 126, episode reward: 450.000, mean reward: 0.526 [0.000, 200.000], mean action: 7.654 [0.000, 8.000], mean observation: 72.777 [0.000, 228.000], loss: 26.111743, mean_absolute_error: 0.086688, acc: 0.912281, mean_q: 0.999998\n", " 46447/100000: episode: 54, duration: 4.697s, episode steps: 583, steps per second: 124, episode reward: 250.000, mean reward: 0.429 [0.000, 10.000], mean action: 7.628 [0.000, 8.000], mean observation: 72.879 [0.000, 228.000], loss: 2.198980, mean_absolute_error: 0.072940, acc: 0.919244, mean_q: 0.999999\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 46962/100000: episode: 55, duration: 3.957s, episode steps: 515, steps per second: 130, episode reward: 110.000, mean reward: 0.214 [0.000, 10.000], mean action: 7.555 [0.000, 8.000], mean observation: 72.866 [0.000, 228.000], loss: 1.142107, mean_absolute_error: 0.055873, acc: 0.900778, mean_q: 0.999999\n", " 47846/100000: episode: 56, duration: 7.034s, episode steps: 884, steps per second: 126, episode reward: 240.000, mean reward: 0.271 [0.000, 50.000], mean action: 7.538 [0.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 2.629218, mean_absolute_error: 0.063533, acc: 0.892412, mean_q: 0.999999\n", " 48636/100000: episode: 57, duration: 6.133s, episode steps: 790, steps per second: 129, episode reward: 920.000, mean reward: 1.165 [0.000, 400.000], mean action: 7.600 [0.000, 8.000], mean observation: 72.732 [0.000, 228.000], loss: 130.072215, mean_absolute_error: 0.158051, acc: 0.910013, mean_q: 0.999999\n", " 49211/100000: episode: 58, duration: 4.527s, episode steps: 575, steps per second: 127, episode reward: 230.000, mean reward: 0.400 [0.000, 10.000], mean action: 7.487 [0.000, 8.000], mean observation: 72.775 [0.000, 228.000], loss: 2.043686, mean_absolute_error: 0.076785, acc: 0.893728, mean_q: 1.000000\n", " 50064/100000: episode: 59, duration: 6.762s, episode steps: 853, steps per second: 126, episode reward: 440.000, mean reward: 0.516 [0.000, 200.000], mean action: 7.653 [0.000, 8.000], mean observation: 72.808 [0.000, 228.000], loss: 26.108275, mean_absolute_error: 0.085424, acc: 0.909624, mean_q: 1.000000\n", " 50907/100000: episode: 60, duration: 6.492s, episode steps: 843, steps per second: 130, episode reward: 280.000, mean reward: 0.332 [0.000, 50.000], mean action: 7.619 [0.000, 8.000], mean observation: 72.713 [0.000, 228.000], loss: 2.913751, mean_absolute_error: 0.061126, acc: 0.918052, mean_q: 1.000000\n", " 52066/100000: episode: 61, duration: 9.202s, episode steps: 1159, steps per second: 126, episode reward: 240.000, mean reward: 0.207 [0.000, 50.000], mean action: 7.677 [0.000, 8.000], mean observation: 72.739 [0.000, 228.000], loss: 1.964234, mean_absolute_error: 0.048993, acc: 0.918826, mean_q: 1.000000\n", " 52669/100000: episode: 62, duration: 4.886s, episode steps: 603, steps per second: 123, episode reward: 250.000, mean reward: 0.415 [0.000, 10.000], mean action: 7.587 [0.000, 8.000], mean observation: 72.773 [0.000, 228.000], loss: 2.122637, mean_absolute_error: 0.074025, acc: 0.911960, mean_q: 1.000000\n", " 53498/100000: episode: 63, duration: 6.810s, episode steps: 829, steps per second: 122, episode reward: 540.000, mean reward: 0.651 [0.000, 200.000], mean action: 7.575 [0.000, 8.000], mean observation: 72.644 [0.000, 228.000], loss: 27.518876, mean_absolute_error: 0.102597, acc: 0.902174, mean_q: 1.000000\n", " 54438/100000: episode: 64, duration: 7.521s, episode steps: 940, steps per second: 125, episode reward: 240.000, mean reward: 0.255 [0.000, 50.000], mean action: 7.545 [0.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 2.387436, mean_absolute_error: 0.059689, acc: 0.902023, mean_q: 1.000000\n", " 55452/100000: episode: 65, duration: 7.977s, episode steps: 1014, steps per second: 127, episode reward: 520.000, mean reward: 0.513 [0.000, 50.000], mean action: 7.628 [0.000, 8.000], mean observation: 72.525 [0.000, 228.000], loss: 3.588693, mean_absolute_error: 0.083786, acc: 0.912142, mean_q: 1.000000\n", " 56369/100000: episode: 66, duration: 7.691s, episode steps: 917, steps per second: 119, episode reward: 840.000, mean reward: 0.916 [0.000, 400.000], mean action: 7.550 [0.000, 8.000], mean observation: 72.793 [0.000, 228.000], loss: 111.591030, mean_absolute_error: 0.129850, acc: 0.909389, mean_q: 1.000000\n", " 57240/100000: episode: 67, duration: 7.270s, episode steps: 871, steps per second: 120, episode reward: 470.000, mean reward: 0.540 [0.000, 200.000], mean action: 7.629 [0.000, 8.000], mean observation: 72.772 [0.000, 228.000], loss: 25.765324, mean_absolute_error: 0.084851, acc: 0.920690, mean_q: 1.000000\n", " 58194/100000: episode: 68, duration: 7.642s, episode steps: 954, steps per second: 125, episode reward: 520.000, mean reward: 0.545 [0.000, 200.000], mean action: 7.621 [0.000, 8.000], mean observation: 72.745 [0.000, 228.000], loss: 23.569458, mean_absolute_error: 0.087988, acc: 0.913956, mean_q: 1.000000\n", " 59060/100000: episode: 69, duration: 6.935s, episode steps: 866, steps per second: 125, episode reward: 840.000, mean reward: 0.970 [0.000, 400.000], mean action: 7.565 [0.000, 8.000], mean observation: 72.795 [0.000, 228.000], loss: 118.224175, mean_absolute_error: 0.137821, acc: 0.905202, mean_q: 1.000000\n", " 59834/100000: episode: 70, duration: 5.999s, episode steps: 774, steps per second: 129, episode reward: 520.000, mean reward: 0.672 [0.000, 200.000], mean action: 7.612 [0.000, 8.000], mean observation: 72.746 [0.000, 228.000], loss: 29.306530, mean_absolute_error: 0.101251, acc: 0.915912, mean_q: 1.000000\n", " 60924/100000: episode: 71, duration: 11.196s, episode steps: 1090, steps per second: 97, episode reward: 240.000, mean reward: 0.220 [0.000, 50.000], mean action: 7.513 [0.000, 8.000], mean observation: 72.826 [0.000, 228.000], loss: 2.112230, mean_absolute_error: 0.055560, acc: 0.898072, mean_q: 1.000000\n", " 61952/100000: episode: 72, duration: 8.290s, episode steps: 1028, steps per second: 124, episode reward: 920.000, mean reward: 0.895 [0.000, 400.000], mean action: 7.568 [0.000, 8.000], mean observation: 72.769 [0.000, 228.000], loss: 99.638470, mean_absolute_error: 0.130471, acc: 0.903603, mean_q: 1.000000\n", " 62842/100000: episode: 73, duration: 6.951s, episode steps: 890, steps per second: 128, episode reward: 840.000, mean reward: 0.944 [0.000, 400.000], mean action: 7.609 [0.000, 8.000], mean observation: 72.804 [0.000, 228.000], loss: 115.731040, mean_absolute_error: 0.134264, acc: 0.908886, mean_q: 1.000000\n", " 63356/100000: episode: 74, duration: 4.006s, episode steps: 514, steps per second: 128, episode reward: 120.000, mean reward: 0.233 [0.000, 10.000], mean action: 7.541 [0.000, 8.000], mean observation: 72.866 [0.000, 228.000], loss: 1.247589, mean_absolute_error: 0.054930, acc: 0.904483, mean_q: 1.000000\n", " 63985/100000: episode: 75, duration: 4.914s, episode steps: 629, steps per second: 128, episode reward: 110.000, mean reward: 0.175 [0.000, 10.000], mean action: 7.650 [0.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.968055, mean_absolute_error: 0.046987, acc: 0.915605, mean_q: 1.000000\n", " 64795/100000: episode: 76, duration: 6.385s, episode steps: 810, steps per second: 127, episode reward: 440.000, mean reward: 0.543 [0.000, 200.000], mean action: 7.648 [0.000, 8.000], mean observation: 72.752 [0.000, 228.000], loss: 27.501371, mean_absolute_error: 0.084056, acc: 0.925834, mean_q: 1.000000\n", " 65724/100000: episode: 77, duration: 7.229s, episode steps: 929, steps per second: 129, episode reward: 450.000, mean reward: 0.484 [0.000, 200.000], mean action: 7.636 [0.000, 8.000], mean observation: 72.823 [0.000, 228.000], loss: 23.860660, mean_absolute_error: 0.078057, acc: 0.921336, mean_q: 1.000000\n", " 66513/100000: episode: 78, duration: 10.277s, episode steps: 789, steps per second: 77, episode reward: 240.000, mean reward: 0.304 [0.000, 50.000], mean action: 7.662 [0.000, 8.000], mean observation: 72.827 [0.000, 228.000], loss: 2.873815, mean_absolute_error: 0.059327, acc: 0.922589, mean_q: 1.000000\n", " 67519/100000: episode: 79, duration: 7.801s, episode steps: 1006, steps per second: 129, episode reward: 850.000, mean reward: 0.845 [0.000, 400.000], mean action: 7.636 [0.000, 8.000], mean observation: 72.835 [0.000, 228.000], loss: 101.992474, mean_absolute_error: 0.119336, acc: 0.918408, mean_q: 1.000000\n", " 68396/100000: episode: 80, duration: 6.807s, episode steps: 877, steps per second: 129, episode reward: 450.000, mean reward: 0.513 [0.000, 200.000], mean action: 7.596 [0.000, 8.000], mean observation: 72.784 [0.000, 228.000], loss: 25.428141, mean_absolute_error: 0.084259, acc: 0.913242, mean_q: 1.000000\n", " 69183/100000: episode: 81, duration: 6.107s, episode steps: 787, steps per second: 129, episode reward: 840.000, mean reward: 1.067 [0.000, 400.000], mean action: 7.682 [0.000, 8.000], mean observation: 72.850 [0.000, 228.000], loss: 130.164832, mean_absolute_error: 0.144570, acc: 0.922392, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 69968/100000: episode: 82, duration: 6.078s, episode steps: 785, steps per second: 129, episode reward: 450.000, mean reward: 0.573 [0.000, 200.000], mean action: 7.618 [0.000, 8.000], mean observation: 72.775 [0.000, 228.000], loss: 28.480391, mean_absolute_error: 0.087428, acc: 0.922194, mean_q: 1.000000\n", " 70897/100000: episode: 83, duration: 7.275s, episode steps: 929, steps per second: 128, episode reward: 840.000, mean reward: 0.904 [0.000, 400.000], mean action: 7.635 [0.000, 8.000], mean observation: 72.838 [0.000, 228.000], loss: 110.639460, mean_absolute_error: 0.127834, acc: 0.917026, mean_q: 1.000000\n", " 71913/100000: episode: 84, duration: 8.216s, episode steps: 1016, steps per second: 124, episode reward: 250.000, mean reward: 0.246 [0.000, 50.000], mean action: 7.594 [0.000, 8.000], mean observation: 72.821 [0.000, 228.000], loss: 2.298804, mean_absolute_error: 0.057479, acc: 0.903448, mean_q: 1.000000\n", " 72995/100000: episode: 85, duration: 8.460s, episode steps: 1082, steps per second: 128, episode reward: 480.000, mean reward: 0.444 [0.000, 50.000], mean action: 7.513 [0.000, 8.000], mean observation: 72.474 [0.000, 228.000], loss: 3.197630, mean_absolute_error: 0.081302, acc: 0.896392, mean_q: 1.000000\n", " 73886/100000: episode: 86, duration: 6.948s, episode steps: 891, steps per second: 128, episode reward: 250.000, mean reward: 0.281 [0.000, 50.000], mean action: 7.590 [0.000, 8.000], mean observation: 72.815 [0.000, 228.000], loss: 2.540349, mean_absolute_error: 0.059831, acc: 0.908989, mean_q: 1.000000\n", " 74673/100000: episode: 87, duration: 6.144s, episode steps: 787, steps per second: 128, episode reward: 440.000, mean reward: 0.559 [0.000, 200.000], mean action: 7.582 [0.000, 8.000], mean observation: 72.830 [0.000, 228.000], loss: 28.320099, mean_absolute_error: 0.090964, acc: 0.908397, mean_q: 1.000000\n", " 75725/100000: episode: 88, duration: 8.263s, episode steps: 1052, steps per second: 127, episode reward: 440.000, mean reward: 0.418 [0.000, 210.000], mean action: 7.647 [0.000, 8.000], mean observation: 72.797 [0.000, 228.000], loss: 22.874603, mean_absolute_error: 0.073542, acc: 0.915319, mean_q: 1.000000\n", " 77082/100000: episode: 89, duration: 10.597s, episode steps: 1357, steps per second: 128, episode reward: 360.000, mean reward: 0.265 [0.000, 50.000], mean action: 7.580 [0.000, 8.000], mean observation: 72.650 [0.000, 228.000], loss: 2.131358, mean_absolute_error: 0.061060, acc: 0.899705, mean_q: 1.000000\n", " 77781/100000: episode: 90, duration: 5.501s, episode steps: 699, steps per second: 127, episode reward: 320.000, mean reward: 0.458 [0.000, 50.000], mean action: 7.597 [0.000, 8.000], mean observation: 72.710 [0.000, 228.000], loss: 3.825074, mean_absolute_error: 0.081441, acc: 0.906877, mean_q: 1.000000\n", " 78863/100000: episode: 91, duration: 8.454s, episode steps: 1082, steps per second: 128, episode reward: 350.000, mean reward: 0.323 [0.000, 50.000], mean action: 7.560 [0.000, 8.000], mean observation: 72.591 [0.000, 228.000], loss: 2.660822, mean_absolute_error: 0.065323, acc: 0.905643, mean_q: 1.000000\n", " 79758/100000: episode: 92, duration: 6.975s, episode steps: 895, steps per second: 128, episode reward: 450.000, mean reward: 0.503 [0.000, 200.000], mean action: 7.515 [0.000, 8.000], mean observation: 72.772 [0.000, 228.000], loss: 25.028459, mean_absolute_error: 0.087384, acc: 0.898210, mean_q: 1.000000\n", " 80623/100000: episode: 93, duration: 6.753s, episode steps: 865, steps per second: 128, episode reward: 330.000, mean reward: 0.382 [0.000, 50.000], mean action: 7.595 [0.000, 8.000], mean observation: 72.696 [0.000, 228.000], loss: 3.137615, mean_absolute_error: 0.072383, acc: 0.901620, mean_q: 1.000000\n", " 81256/100000: episode: 94, duration: 4.939s, episode steps: 633, steps per second: 128, episode reward: 180.000, mean reward: 0.284 [0.000, 10.000], mean action: 7.555 [0.000, 8.000], mean observation: 72.880 [0.000, 228.000], loss: 1.506944, mean_absolute_error: 0.061245, acc: 0.905063, mean_q: 1.000000\n", " 82119/100000: episode: 95, duration: 6.707s, episode steps: 863, steps per second: 129, episode reward: 440.000, mean reward: 0.510 [0.000, 200.000], mean action: 7.622 [0.000, 8.000], mean observation: 72.793 [0.000, 228.000], loss: 25.798052, mean_absolute_error: 0.084959, acc: 0.910673, mean_q: 1.000000\n", " 83096/100000: episode: 96, duration: 7.616s, episode steps: 977, steps per second: 128, episode reward: 850.000, mean reward: 0.870 [0.000, 400.000], mean action: 7.672 [0.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 104.618345, mean_absolute_error: 0.120369, acc: 0.922131, mean_q: 1.000000\n", " 83961/100000: episode: 97, duration: 6.750s, episode steps: 865, steps per second: 128, episode reward: 330.000, mean reward: 0.382 [0.000, 50.000], mean action: 7.570 [0.000, 8.000], mean observation: 72.631 [0.000, 228.000], loss: 3.114582, mean_absolute_error: 0.072126, acc: 0.899306, mean_q: 1.000000\n", " 84566/100000: episode: 98, duration: 4.728s, episode steps: 605, steps per second: 128, episode reward: 140.000, mean reward: 0.231 [0.000, 10.000], mean action: 7.633 [0.000, 8.000], mean observation: 72.877 [0.000, 228.000], loss: 1.210112, mean_absolute_error: 0.050493, acc: 0.918874, mean_q: 1.000000\n", " 85462/100000: episode: 99, duration: 7.022s, episode steps: 896, steps per second: 128, episode reward: 490.000, mean reward: 0.547 [0.000, 200.000], mean action: 7.643 [0.000, 8.000], mean observation: 72.731 [0.000, 228.000], loss: 25.314672, mean_absolute_error: 0.088062, acc: 0.913966, mean_q: 1.000000\n", " 86243/100000: episode: 100, duration: 6.134s, episode steps: 781, steps per second: 127, episode reward: 450.000, mean reward: 0.576 [0.000, 200.000], mean action: 7.603 [0.000, 8.000], mean observation: 72.782 [0.000, 228.000], loss: 28.307328, mean_absolute_error: 0.091382, acc: 0.911538, mean_q: 1.000000\n", " 87223/100000: episode: 101, duration: 7.687s, episode steps: 980, steps per second: 127, episode reward: 450.000, mean reward: 0.459 [0.000, 200.000], mean action: 7.545 [0.000, 8.000], mean observation: 72.808 [0.000, 228.000], loss: 22.798168, mean_absolute_error: 0.081012, acc: 0.905005, mean_q: 1.000000\n", " 88124/100000: episode: 102, duration: 7.013s, episode steps: 901, steps per second: 128, episode reward: 450.000, mean reward: 0.499 [0.000, 200.000], mean action: 7.511 [0.000, 8.000], mean observation: 72.826 [0.000, 228.000], loss: 24.825180, mean_absolute_error: 0.090104, acc: 0.890000, mean_q: 1.000000\n", " 88941/100000: episode: 103, duration: 6.389s, episode steps: 817, steps per second: 128, episode reward: 330.000, mean reward: 0.404 [0.000, 50.000], mean action: 7.553 [0.000, 8.000], mean observation: 72.699 [0.000, 228.000], loss: 3.368771, mean_absolute_error: 0.075227, acc: 0.900735, mean_q: 1.000000\n", " 89994/100000: episode: 104, duration: 8.242s, episode steps: 1053, steps per second: 128, episode reward: 840.000, mean reward: 0.798 [0.000, 400.000], mean action: 7.640 [0.000, 8.000], mean observation: 72.799 [0.000, 228.000], loss: 97.199268, mean_absolute_error: 0.116136, acc: 0.910646, mean_q: 1.000000\n", " 90850/100000: episode: 105, duration: 6.707s, episode steps: 856, steps per second: 128, episode reward: 280.000, mean reward: 0.327 [0.000, 50.000], mean action: 7.605 [0.000, 8.000], mean observation: 72.757 [0.000, 228.000], loss: 2.850689, mean_absolute_error: 0.064840, acc: 0.908772, mean_q: 1.000000\n", " 91796/100000: episode: 106, duration: 7.387s, episode steps: 946, steps per second: 128, episode reward: 500.000, mean reward: 0.529 [0.000, 200.000], mean action: 7.489 [0.000, 8.000], mean observation: 72.745 [0.000, 228.000], loss: 24.115241, mean_absolute_error: 0.090460, acc: 0.895238, mean_q: 1.000000\n", " 92631/100000: episode: 107, duration: 6.531s, episode steps: 835, steps per second: 128, episode reward: 850.000, mean reward: 1.018 [0.000, 400.000], mean action: 7.544 [0.000, 8.000], mean observation: 72.760 [0.000, 228.000], loss: 122.685083, mean_absolute_error: 0.146609, acc: 0.890887, mean_q: 1.000000\n", " 93520/100000: episode: 108, duration: 6.946s, episode steps: 889, steps per second: 128, episode reward: 440.000, mean reward: 0.495 [0.000, 200.000], mean action: 7.594 [0.000, 8.000], mean observation: 72.825 [0.000, 228.000], loss: 25.282309, mean_absolute_error: 0.084610, acc: 0.903153, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 94361/100000: episode: 109, duration: 6.602s, episode steps: 841, steps per second: 127, episode reward: 330.000, mean reward: 0.392 [0.000, 50.000], mean action: 7.633 [0.000, 8.000], mean observation: 72.638 [0.000, 228.000], loss: 3.195201, mean_absolute_error: 0.067343, acc: 0.922619, mean_q: 1.000000\n", " 94903/100000: episode: 110, duration: 4.239s, episode steps: 542, steps per second: 128, episode reward: 200.000, mean reward: 0.369 [0.000, 10.000], mean action: 7.683 [0.000, 8.000], mean observation: 72.835 [0.000, 228.000], loss: 1.897125, mean_absolute_error: 0.065492, acc: 0.920518, mean_q: 1.000000\n", " 95480/100000: episode: 111, duration: 4.534s, episode steps: 577, steps per second: 127, episode reward: 150.000, mean reward: 0.260 [0.000, 10.000], mean action: 7.624 [0.000, 8.000], mean observation: 72.871 [0.000, 228.000], loss: 1.386677, mean_absolute_error: 0.058260, acc: 0.909722, mean_q: 1.000000\n", " 96189/100000: episode: 112, duration: 5.534s, episode steps: 709, steps per second: 128, episode reward: 450.000, mean reward: 0.635 [0.000, 200.000], mean action: 7.573 [0.000, 8.000], mean observation: 72.778 [0.000, 228.000], loss: 31.451715, mean_absolute_error: 0.099940, acc: 0.906780, mean_q: 1.000000\n", " 96900/100000: episode: 113, duration: 5.571s, episode steps: 711, steps per second: 128, episode reward: 450.000, mean reward: 0.633 [0.000, 200.000], mean action: 7.615 [0.000, 8.000], mean observation: 72.797 [0.000, 228.000], loss: 31.400907, mean_absolute_error: 0.099200, acc: 0.908451, mean_q: 1.000000\n", " 97995/100000: episode: 114, duration: 8.594s, episode steps: 1095, steps per second: 127, episode reward: 240.000, mean reward: 0.219 [0.000, 50.000], mean action: 7.572 [0.000, 8.000], mean observation: 72.780 [0.000, 228.000], loss: 2.085018, mean_absolute_error: 0.054207, acc: 0.905850, mean_q: 1.000000\n", " 98786/100000: episode: 115, duration: 6.189s, episode steps: 791, steps per second: 128, episode reward: 930.000, mean reward: 1.176 [0.000, 400.000], mean action: 7.575 [0.000, 8.000], mean observation: 72.678 [0.000, 228.000], loss: 129.505934, mean_absolute_error: 0.158702, acc: 0.915190, mean_q: 1.000000\n", " 99578/100000: episode: 116, duration: 6.201s, episode steps: 792, steps per second: 128, episode reward: 240.000, mean reward: 0.303 [0.000, 50.000], mean action: 7.539 [0.000, 8.000], mean observation: 72.811 [0.000, 228.000], loss: 2.884318, mean_absolute_error: 0.063420, acc: 0.901391, mean_q: 1.000000\n", "done, took 890.633 seconds\n", "Testing for 10 episodes ...\n", "Episode 1: reward: 70.000, steps: 631\n", "Episode 2: reward: 70.000, steps: 640\n", "Episode 3: reward: 70.000, steps: 624\n", "Episode 4: reward: 70.000, steps: 616\n", "Episode 5: reward: 70.000, steps: 624\n", "Episode 6: reward: 70.000, steps: 633\n", "Episode 7: reward: 70.000, steps: 629\n", "Episode 8: reward: 70.000, steps: 621\n", "Episode 9: reward: 70.000, steps: 640\n", "Episode 10: reward: 70.000, steps: 615\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#SARSA Agent -- Reinforcement Learning with no policy \n", "from rl.agents.sarsa import SARSAAgent\n", "sarsa = SARSAAgent(model, nb_actions, \n", " policy=None, test_policy=None, \n", " gamma=0.99, nb_steps_warmup=10, \n", " train_interval=1)\n", "sarsa.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])\n", "sarsa.fit(env, nb_steps=100000, visualize=True, verbose=2)\n", "sarsa.test(env, nb_episodes=10, visualize=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Conclusion State-Action-Reward agent\n", "\n", "#### The State-action-Reward agent with Neural Network model and no policy\n", "#### shows unstable learning during episodes. \n", "#### The test reward comes at 70 and the duration of the episode is low ( the agent doesn´t play for much time)\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training for 100000 steps ...\n", " 789/100000: episode: 1, duration: 8.591s, episode steps: 789, steps per second: 92, episode reward: 270.000, mean reward: 0.342 [0.000, 50.000], mean action: 7.024 [0.000, 8.000], mean observation: 72.821 [0.000, 228.000], loss: 3.280811, mean_absolute_error: 0.095194, acc: 0.786632, mean_q: 1.000000\n", " 1790/100000: episode: 2, duration: 7.855s, episode steps: 1001, steps per second: 127, episode reward: 570.000, mean reward: 0.569 [0.000, 200.000], mean action: 7.797 [0.000, 8.000], mean observation: 72.541 [0.000, 228.000], loss: 22.827030, mean_absolute_error: 0.077349, acc: 0.954000, mean_q: 1.000000\n", " 2623/100000: episode: 3, duration: 6.634s, episode steps: 833, steps per second: 126, episode reward: 840.000, mean reward: 1.008 [0.000, 400.000], mean action: 7.819 [0.000, 8.000], mean observation: 72.809 [0.000, 228.000], loss: 122.849101, mean_absolute_error: 0.124602, acc: 0.962740, mean_q: 1.000000\n", " 3374/100000: episode: 4, duration: 5.933s, episode steps: 751, steps per second: 127, episode reward: 110.000, mean reward: 0.146 [0.000, 10.000], mean action: 7.879 [0.000, 8.000], mean observation: 72.877 [0.000, 228.000], loss: 0.744462, mean_absolute_error: 0.025588, acc: 0.973333, mean_q: 1.000000\n", " 4005/100000: episode: 5, duration: 4.980s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.938 [0.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.585303, mean_absolute_error: 0.018325, acc: 0.984127, mean_q: 1.000000\n", " 4851/100000: episode: 6, duration: 6.910s, episode steps: 846, steps per second: 122, episode reward: 440.000, mean reward: 0.520 [0.000, 200.000], mean action: 7.862 [0.000, 8.000], mean observation: 72.819 [0.000, 228.000], loss: 26.284027, mean_absolute_error: 0.068155, acc: 0.970414, mean_q: 1.000000\n", " 5748/100000: episode: 7, duration: 7.205s, episode steps: 897, steps per second: 124, episode reward: 440.000, mean reward: 0.491 [0.000, 200.000], mean action: 7.928 [0.000, 8.000], mean observation: 72.817 [0.000, 228.000], loss: 24.786848, mean_absolute_error: 0.060533, acc: 0.984375, mean_q: 1.000000\n", " 6703/100000: episode: 8, duration: 7.544s, episode steps: 955, steps per second: 127, episode reward: 240.000, mean reward: 0.251 [0.000, 50.000], mean action: 7.938 [0.000, 8.000], mean observation: 72.719 [0.000, 228.000], loss: 2.308288, mean_absolute_error: 0.033744, acc: 0.984277, mean_q: 1.000000\n", " 7332/100000: episode: 9, duration: 4.978s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.976 [2.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.561785, mean_absolute_error: 0.015223, acc: 0.993631, mean_q: 1.000000\n", " 8208/100000: episode: 10, duration: 6.942s, episode steps: 876, steps per second: 126, episode reward: 440.000, mean reward: 0.502 [0.000, 200.000], mean action: 7.962 [0.000, 8.000], mean observation: 72.792 [0.000, 228.000], loss: 25.376075, mean_absolute_error: 0.060074, acc: 0.989714, mean_q: 1.000000\n", " 8783/100000: episode: 11, duration: 4.834s, episode steps: 575, steps per second: 119, episode reward: 140.000, mean reward: 0.243 [0.000, 10.000], mean action: 7.932 [0.000, 8.000], mean observation: 72.878 [0.000, 228.000], loss: 1.250938, mean_absolute_error: 0.033527, acc: 0.982578, mean_q: 1.000000\n", " 9401/100000: episode: 12, duration: 5.140s, episode steps: 618, steps per second: 120, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.571800, mean_absolute_error: 0.015475, acc: 0.993517, mean_q: 1.000000\n", " 10323/100000: episode: 13, duration: 7.536s, episode steps: 922, steps per second: 122, episode reward: 840.000, mean reward: 0.911 [0.000, 400.000], mean action: 7.959 [1.000, 8.000], mean observation: 72.823 [0.000, 228.000], loss: 110.953515, mean_absolute_error: 0.104428, acc: 0.992400, mean_q: 1.000000\n", " 10948/100000: episode: 14, duration: 4.942s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.971 [0.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.565386, mean_absolute_error: 0.015313, acc: 0.993590, mean_q: 1.000000\n", " 11577/100000: episode: 15, duration: 4.973s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.973 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563362, mean_absolute_error: 0.015748, acc: 0.992038, mean_q: 1.000000\n", " 12204/100000: episode: 16, duration: 4.928s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.981 [2.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.563580, mean_absolute_error: 0.015268, acc: 0.993610, mean_q: 1.000000\n", " 13106/100000: episode: 17, duration: 7.141s, episode steps: 902, steps per second: 126, episode reward: 440.000, mean reward: 0.488 [0.000, 210.000], mean action: 7.966 [1.000, 8.000], mean observation: 72.829 [0.000, 228.000], loss: 26.851471, mean_absolute_error: 0.057764, acc: 0.991121, mean_q: 1.000000\n", " 13841/100000: episode: 18, duration: 5.842s, episode steps: 735, steps per second: 126, episode reward: 150.000, mean reward: 0.204 [0.000, 10.000], mean action: 7.958 [1.000, 8.000], mean observation: 72.865 [0.000, 228.000], loss: 1.032624, mean_absolute_error: 0.028118, acc: 0.985014, mean_q: 1.000000\n", " 14482/100000: episode: 19, duration: 5.068s, episode steps: 641, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.978 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554346, mean_absolute_error: 0.015990, acc: 0.990625, mean_q: 1.000000\n", " 15109/100000: episode: 20, duration: 4.967s, episode steps: 627, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.976 [3.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.565161, mean_absolute_error: 0.015795, acc: 0.992013, mean_q: 1.000000\n", " 15720/100000: episode: 21, duration: 4.823s, episode steps: 611, steps per second: 127, episode reward: 80.000, mean reward: 0.131 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.656919, mean_absolute_error: 0.016375, acc: 0.996721, mean_q: 1.000000\n", " 16345/100000: episode: 22, duration: 4.929s, episode steps: 625, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.986 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563800, mean_absolute_error: 0.014785, acc: 0.995192, mean_q: 1.000000\n", " 16951/100000: episode: 23, duration: 4.810s, episode steps: 606, steps per second: 126, episode reward: 90.000, mean reward: 0.149 [0.000, 10.000], mean action: 7.962 [1.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.751372, mean_absolute_error: 0.020516, acc: 0.990083, mean_q: 1.000000\n", " 17576/100000: episode: 24, duration: 4.960s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.957 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.570146, mean_absolute_error: 0.016900, acc: 0.988782, mean_q: 1.000000\n", " 18637/100000: episode: 25, duration: 8.378s, episode steps: 1061, steps per second: 127, episode reward: 840.000, mean reward: 0.792 [0.000, 400.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.842 [0.000, 228.000], loss: 96.410495, mean_absolute_error: 0.090153, acc: 0.996226, mean_q: 1.000000\n", " 19465/100000: episode: 26, duration: 6.697s, episode steps: 828, steps per second: 124, episode reward: 240.000, mean reward: 0.290 [0.000, 50.000], mean action: 7.949 [0.000, 8.000], mean observation: 72.794 [0.000, 228.000], loss: 2.667546, mean_absolute_error: 0.036628, acc: 0.989117, mean_q: 1.000000\n", " 20634/100000: episode: 27, duration: 9.208s, episode steps: 1169, steps per second: 127, episode reward: 840.000, mean reward: 0.719 [0.000, 400.000], mean action: 7.964 [1.000, 8.000], mean observation: 72.786 [0.000, 228.000], loss: 87.500916, mean_absolute_error: 0.083615, acc: 0.991438, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 21581/100000: episode: 28, duration: 7.471s, episode steps: 947, steps per second: 127, episode reward: 440.000, mean reward: 0.465 [0.000, 200.000], mean action: 7.957 [0.000, 8.000], mean observation: 72.800 [0.000, 228.000], loss: 23.473624, mean_absolute_error: 0.056346, acc: 0.988372, mean_q: 1.000000\n", " 22426/100000: episode: 29, duration: 6.663s, episode steps: 845, steps per second: 127, episode reward: 840.000, mean reward: 0.994 [0.000, 400.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.795 [0.000, 228.000], loss: 121.085431, mean_absolute_error: 0.113332, acc: 0.994076, mean_q: 1.000000\n", " 23047/100000: episode: 30, duration: 4.883s, episode steps: 621, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 23664/100000: episode: 31, duration: 4.853s, episode steps: 617, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000\n", " 24948/100000: episode: 32, duration: 10.138s, episode steps: 1284, steps per second: 127, episode reward: 520.000, mean reward: 0.405 [0.000, 200.000], mean action: 7.983 [1.000, 8.000], mean observation: 72.676 [0.000, 228.000], loss: 17.623777, mean_absolute_error: 0.047725, acc: 0.994544, mean_q: 1.000000\n", " 25576/100000: episode: 33, duration: 4.944s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.971 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562681, mean_absolute_error: 0.015245, acc: 0.993620, mean_q: 1.000000\n", " 26208/100000: episode: 34, duration: 4.981s, episode steps: 632, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.983 [4.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.560683, mean_absolute_error: 0.015679, acc: 0.992076, mean_q: 1.000000\n", " 26819/100000: episode: 35, duration: 4.820s, episode steps: 611, steps per second: 127, episode reward: 100.000, mean reward: 0.164 [0.000, 10.000], mean action: 7.971 [0.000, 8.000], mean observation: 72.898 [0.000, 228.000], loss: 0.825394, mean_absolute_error: 0.021634, acc: 0.991803, mean_q: 1.000000\n", " 27757/100000: episode: 36, duration: 7.389s, episode steps: 938, steps per second: 127, episode reward: 440.000, mean reward: 0.469 [0.000, 200.000], mean action: 7.981 [2.000, 8.000], mean observation: 72.783 [0.000, 228.000], loss: 23.692752, mean_absolute_error: 0.054763, acc: 0.994664, mean_q: 1.000000\n", " 28599/100000: episode: 37, duration: 6.656s, episode steps: 842, steps per second: 127, episode reward: 440.000, mean reward: 0.523 [0.000, 200.000], mean action: 7.973 [1.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 26.398447, mean_absolute_error: 0.061280, acc: 0.992866, mean_q: 1.000000\n", " 29238/100000: episode: 38, duration: 5.097s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551429, mean_absolute_error: 0.014485, acc: 0.995298, mean_q: 1.000000\n", " 30236/100000: episode: 39, duration: 7.870s, episode steps: 998, steps per second: 127, episode reward: 840.000, mean reward: 0.842 [0.000, 400.000], mean action: 7.967 [0.000, 8.000], mean observation: 72.828 [0.000, 228.000], loss: 102.505608, mean_absolute_error: 0.096773, acc: 0.992979, mean_q: 1.000000\n", " 30870/100000: episode: 40, duration: 5.020s, episode steps: 634, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555784, mean_absolute_error: 0.014590, acc: 0.995261, mean_q: 1.000000\n", " 31498/100000: episode: 41, duration: 4.984s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 32133/100000: episode: 42, duration: 5.002s, episode steps: 635, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554908, mean_absolute_error: 0.014569, acc: 0.995268, mean_q: 1.000000\n", " 32764/100000: episode: 43, duration: 4.970s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000\n", " 33384/100000: episode: 44, duration: 4.898s, episode steps: 620, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.984 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569953, mean_absolute_error: 0.015428, acc: 0.993538, mean_q: 1.000000\n", " 34004/100000: episode: 45, duration: 4.894s, episode steps: 620, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.568353, mean_absolute_error: 0.014895, acc: 0.995153, mean_q: 1.000000\n", " 34638/100000: episode: 46, duration: 4.976s, episode steps: 634, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.970 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557348, mean_absolute_error: 0.015111, acc: 0.993681, mean_q: 1.000000\n", " 35257/100000: episode: 47, duration: 4.872s, episode steps: 619, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000\n", " 35885/100000: episode: 48, duration: 4.941s, episode steps: 628, steps per second: 127, episode reward: 110.000, mean reward: 0.175 [0.000, 10.000], mean action: 7.959 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.884180, mean_absolute_error: 0.023372, acc: 0.990431, mean_q: 1.000000\n", " 36510/100000: episode: 49, duration: 4.929s, episode steps: 625, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 37137/100000: episode: 50, duration: 4.964s, episode steps: 627, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 37755/100000: episode: 51, duration: 4.867s, episode steps: 618, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.994 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.570196, mean_absolute_error: 0.014940, acc: 0.995138, mean_q: 1.000000\n", " 38625/100000: episode: 52, duration: 6.905s, episode steps: 870, steps per second: 126, episode reward: 840.000, mean reward: 0.966 [0.000, 400.000], mean action: 7.970 [0.000, 8.000], mean observation: 72.784 [0.000, 228.000], loss: 117.603101, mean_absolute_error: 0.110483, acc: 0.993096, mean_q: 1.000000\n", " 39260/100000: episode: 53, duration: 5.013s, episode steps: 635, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554908, mean_absolute_error: 0.014569, acc: 0.995268, mean_q: 1.000000\n", " 39899/100000: episode: 54, duration: 5.069s, episode steps: 639, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 40534/100000: episode: 55, duration: 5.144s, episode steps: 635, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000\n", " 41166/100000: episode: 56, duration: 5.067s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 41784/100000: episode: 57, duration: 4.906s, episode steps: 618, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.584799, mean_absolute_error: 0.014408, acc: 0.996759, mean_q: 1.000000\n", " 42415/100000: episode: 58, duration: 5.117s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.975 [3.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.563145, mean_absolute_error: 0.016226, acc: 0.990476, mean_q: 1.000000\n", " 43037/100000: episode: 59, duration: 4.936s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 43655/100000: episode: 60, duration: 4.930s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 44268/100000: episode: 61, duration: 4.983s, episode steps: 613, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000\n", " 44893/100000: episode: 62, duration: 4.937s, episode steps: 625, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563800, mean_absolute_error: 0.014785, acc: 0.995192, mean_q: 1.000000\n", " 45653/100000: episode: 63, duration: 6.066s, episode steps: 760, steps per second: 125, episode reward: 240.000, mean reward: 0.316 [0.000, 50.000], mean action: 7.962 [0.000, 8.000], mean observation: 72.820 [0.000, 228.000], loss: 2.901315, mean_absolute_error: 0.038071, acc: 0.993412, mean_q: 1.000000\n", " 46295/100000: episode: 64, duration: 5.093s, episode steps: 642, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000\n", " 46931/100000: episode: 65, duration: 5.019s, episode steps: 636, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 47553/100000: episode: 66, duration: 4.897s, episode steps: 622, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.986 [2.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.566523, mean_absolute_error: 0.014851, acc: 0.995169, mean_q: 1.000000\n", " 48180/100000: episode: 67, duration: 4.945s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 48804/100000: episode: 68, duration: 4.913s, episode steps: 624, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.564705, mean_absolute_error: 0.014806, acc: 0.995185, mean_q: 1.000000\n", " 49433/100000: episode: 69, duration: 4.955s, episode steps: 629, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [4.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.560209, mean_absolute_error: 0.014697, acc: 0.995223, mean_q: 1.000000\n", " 50063/100000: episode: 70, duration: 5.026s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 50704/100000: episode: 71, duration: 5.087s, episode steps: 641, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 51449/100000: episode: 72, duration: 5.895s, episode steps: 745, steps per second: 126, episode reward: 240.000, mean reward: 0.322 [0.000, 50.000], mean action: 7.952 [0.000, 8.000], mean observation: 72.818 [0.000, 228.000], loss: 2.965130, mean_absolute_error: 0.040590, acc: 0.987903, mean_q: 1.000000\n", " 52075/100000: episode: 73, duration: 4.940s, episode steps: 626, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 52703/100000: episode: 74, duration: 4.941s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.975 [1.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.562681, mean_absolute_error: 0.015245, acc: 0.993620, mean_q: 1.000000\n", " 53332/100000: episode: 75, duration: 4.983s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 53969/100000: episode: 76, duration: 5.072s, episode steps: 637, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000\n", " 54597/100000: episode: 77, duration: 5.116s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 55228/100000: episode: 78, duration: 5.108s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.971 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560002, mean_absolute_error: 0.015178, acc: 0.993651, mean_q: 1.000000\n", " 55859/100000: episode: 79, duration: 5.018s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.541145, mean_absolute_error: 0.013781, acc: 0.996825, mean_q: 1.000000\n", " 56487/100000: episode: 80, duration: 5.084s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.981 [1.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014368, acc: 0.996810, mean_q: 1.000000\n", " 57121/100000: episode: 81, duration: 5.067s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.984 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555784, mean_absolute_error: 0.014590, acc: 0.995261, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 57801/100000: episode: 82, duration: 5.387s, episode steps: 680, steps per second: 126, episode reward: 100.000, mean reward: 0.147 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.906 [0.000, 228.000], loss: 0.737148, mean_absolute_error: 0.018090, acc: 0.997054, mean_q: 1.000000\n", " 58429/100000: episode: 83, duration: 4.997s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 59050/100000: episode: 84, duration: 4.890s, episode steps: 621, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 59677/100000: episode: 85, duration: 4.933s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [5.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000\n", " 60315/100000: episode: 86, duration: 5.016s, episode steps: 638, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 60924/100000: episode: 87, duration: 4.881s, episode steps: 609, steps per second: 125, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 7.989 [4.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.578635, mean_absolute_error: 0.015144, acc: 0.995066, mean_q: 1.000000\n", " 61549/100000: episode: 88, duration: 5.010s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 62177/100000: episode: 89, duration: 4.986s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561102, mean_absolute_error: 0.014719, acc: 0.995215, mean_q: 1.000000\n", " 63110/100000: episode: 90, duration: 7.407s, episode steps: 933, steps per second: 126, episode reward: 440.000, mean reward: 0.472 [0.000, 200.000], mean action: 7.983 [1.000, 8.000], mean observation: 72.788 [0.000, 228.000], loss: 23.818796, mean_absolute_error: 0.054697, acc: 0.995708, mean_q: 1.000000\n", " 63732/100000: episode: 91, duration: 4.910s, episode steps: 622, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 64354/100000: episode: 92, duration: 5.029s, episode steps: 622, steps per second: 124, episode reward: 80.000, mean reward: 0.129 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.645283, mean_absolute_error: 0.016105, acc: 0.996779, mean_q: 1.000000\n", " 64972/100000: episode: 93, duration: 5.117s, episode steps: 618, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568591, mean_absolute_error: 0.014405, acc: 0.996759, mean_q: 1.000000\n", " 65567/100000: episode: 94, duration: 4.784s, episode steps: 595, steps per second: 124, episode reward: 90.000, mean reward: 0.151 [0.000, 10.000], mean action: 7.976 [1.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.743619, mean_absolute_error: 0.018838, acc: 0.994949, mean_q: 1.000000\n", " 66193/100000: episode: 95, duration: 5.162s, episode steps: 626, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 66981/100000: episode: 96, duration: 6.200s, episode steps: 788, steps per second: 127, episode reward: 240.000, mean reward: 0.305 [0.000, 50.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 2.795577, mean_absolute_error: 0.035917, acc: 0.996188, mean_q: 1.000000\n", " 67628/100000: episode: 97, duration: 5.357s, episode steps: 647, steps per second: 121, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.543068, mean_absolute_error: 0.013808, acc: 0.996904, mean_q: 1.000000\n", " 68259/100000: episode: 98, duration: 4.961s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.984 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000\n", " 68885/100000: episode: 99, duration: 4.916s, episode steps: 626, steps per second: 127, episode reward: 80.000, mean reward: 0.128 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.897 [0.000, 228.000], loss: 0.644322, mean_absolute_error: 0.017065, acc: 0.993600, mean_q: 1.000000\n", " 69511/100000: episode: 100, duration: 4.927s, episode steps: 626, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 70147/100000: episode: 101, duration: 5.125s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 70766/100000: episode: 102, duration: 5.038s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000\n", " 71392/100000: episode: 103, duration: 5.094s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000\n", " 72022/100000: episode: 104, duration: 5.135s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 72636/100000: episode: 105, duration: 4.888s, episode steps: 614, steps per second: 126, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 73349/100000: episode: 106, duration: 5.632s, episode steps: 713, steps per second: 127, episode reward: 90.000, mean reward: 0.126 [0.000, 10.000], mean action: 7.990 [3.000, 8.000], mean observation: 72.905 [0.000, 228.000], loss: 0.634291, mean_absolute_error: 0.016209, acc: 0.995787, mean_q: 1.000000\n", " 73982/100000: episode: 107, duration: 5.060s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556664, mean_absolute_error: 0.014611, acc: 0.995253, mean_q: 1.000000\n", " 74604/100000: episode: 108, duration: 4.916s, episode steps: 622, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 75216/100000: episode: 109, duration: 4.820s, episode steps: 612, steps per second: 127, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.574174, mean_absolute_error: 0.014535, acc: 0.996727, mean_q: 1.000000\n", " 75844/100000: episode: 110, duration: 4.956s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 76463/100000: episode: 111, duration: 4.880s, episode steps: 619, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.997 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.569273, mean_absolute_error: 0.014917, acc: 0.995146, mean_q: 1.000000\n", " 77085/100000: episode: 112, duration: 4.921s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 77720/100000: episode: 113, duration: 5.000s, episode steps: 635, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 78350/100000: episode: 114, duration: 4.982s, episode steps: 630, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559318, mean_absolute_error: 0.014676, acc: 0.995231, mean_q: 1.000000\n", " 79292/100000: episode: 115, duration: 7.416s, episode steps: 942, steps per second: 127, episode reward: 840.000, mean reward: 0.892 [0.000, 400.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.790 [0.000, 228.000], loss: 108.600573, mean_absolute_error: 0.100712, acc: 0.997875, mean_q: 1.000000\n", " 79912/100000: episode: 116, duration: 4.890s, episode steps: 620, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000\n", " 80542/100000: episode: 117, duration: 4.974s, episode steps: 630, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 81149/100000: episode: 118, duration: 4.792s, episode steps: 607, steps per second: 127, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.578911, mean_absolute_error: 0.014646, acc: 0.996700, mean_q: 1.000000\n", " 82026/100000: episode: 119, duration: 6.922s, episode steps: 877, steps per second: 127, episode reward: 440.000, mean reward: 0.502 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.796 [0.000, 228.000], loss: 25.339196, mean_absolute_error: 0.057369, acc: 0.997717, mean_q: 1.000000\n", " 82659/100000: episode: 120, duration: 4.986s, episode steps: 633, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 83288/100000: episode: 121, duration: 4.949s, episode steps: 629, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 83917/100000: episode: 122, duration: 4.984s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 84546/100000: episode: 123, duration: 4.971s, episode steps: 629, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 85170/100000: episode: 124, duration: 4.939s, episode steps: 624, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000\n", " 85796/100000: episode: 125, duration: 4.954s, episode steps: 626, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 86416/100000: episode: 126, duration: 4.905s, episode steps: 620, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 87038/100000: episode: 127, duration: 4.916s, episode steps: 622, steps per second: 127, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 87661/100000: episode: 128, duration: 4.946s, episode steps: 623, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000\n", " 88289/100000: episode: 129, duration: 4.945s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.561102, mean_absolute_error: 0.014719, acc: 0.995215, mean_q: 1.000000\n", " 88920/100000: episode: 130, duration: 4.979s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 89547/100000: episode: 131, duration: 4.935s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 90683/100000: episode: 132, duration: 8.939s, episode steps: 1136, steps per second: 127, episode reward: 440.000, mean reward: 0.387 [0.000, 200.000], mean action: 7.995 [4.000, 8.000], mean observation: 72.763 [0.000, 228.000], loss: 19.557831, mean_absolute_error: 0.044822, acc: 0.997357, mean_q: 1.000000\n", " 91299/100000: episode: 133, duration: 4.879s, episode steps: 616, steps per second: 126, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 91913/100000: episode: 134, duration: 4.856s, episode steps: 614, steps per second: 126, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 92540/100000: episode: 135, duration: 4.927s, episode steps: 627, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 93168/100000: episode: 136, duration: 4.948s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 93796/100000: episode: 137, duration: 4.945s, episode steps: 628, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 94433/100000: episode: 138, duration: 5.023s, episode steps: 637, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.536040, mean_absolute_error: 0.013662, acc: 0.996855, mean_q: 1.000000\n", " 95050/100000: episode: 139, duration: 5.681s, episode steps: 617, steps per second: 109, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000\n", " 95686/100000: episode: 140, duration: 5.984s, episode steps: 636, steps per second: 106, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000\n", " 96327/100000: episode: 141, duration: 5.046s, episode steps: 641, steps per second: 127, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 96953/100000: episode: 142, duration: 4.919s, episode steps: 626, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 97576/100000: episode: 143, duration: 4.899s, episode steps: 623, steps per second: 127, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 98207/100000: episode: 144, duration: 4.985s, episode steps: 631, steps per second: 127, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 98841/100000: episode: 145, duration: 4.987s, episode steps: 634, steps per second: 127, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 99484/100000: episode: 146, duration: 5.079s, episode steps: 643, steps per second: 127, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000\n", "done, took 829.107 seconds\n", "Testing for 10 episodes ...\n", "Episode 1: reward: 70.000, steps: 621\n", "Episode 2: reward: 70.000, steps: 629\n", "Episode 3: reward: 70.000, steps: 618\n", "Episode 4: reward: 70.000, steps: 628\n", "Episode 5: reward: 70.000, steps: 629\n", "Episode 6: reward: 70.000, steps: 633\n", "Episode 7: reward: 70.000, steps: 614\n", "Episode 8: reward: 70.000, steps: 620\n", "Episode 9: reward: 70.000, steps: 641\n", "Episode 10: reward: 70.000, steps: 627\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sarsa = SARSAAgent(model, nb_actions, \n", " policy=policy, test_policy=None, \n", " gamma=0.99, nb_steps_warmup=10, \n", " train_interval=1)\n", "sarsa.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])\n", "sarsa.fit(env, nb_steps=100000, visualize=True, verbose=2)\n", "sarsa.test(env, nb_episodes=10, visualize=True)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzsnXeUFEXXxp8m55wzrAgiOYmYUERRQZIoiAkVMGBEfc2Y0yu+YABF/ARFASWoJEFBggrIkrOEJeeFXVgWNt7vj0uf6VDd0z3TM9Mz1O+cObvTXV1TU91zu/rWc28pRASJRCKRxD8FYt0AiUQikXiDNOgSiUSSIEiDLpFIJAmCNOgSiUSSIEiDLpFIJAmCNOgSiUSSIAQ16Iqi/J+iKEcVRdmo2VZBUZTfFEXZfv5v+cg2UyKRSCTBcDJCHw+gq2Hb8wAWEFFDAAvOv5dIJBJJDFGcBBYpilIPwCwianr+/TYAnYjokKIo1QEsIqJGkWyoRCKRSOwpFOJxVYno0Pn/DwOoalVQUZTBAAYDQAVFaVO/QgWgXj1zQSJg9WqgRg2genXetmcPkJEBXHqpufyuXcDZs/p9x44Be/cCLVoAhQrx8Wlp/B4AsrJ4W40aQKlSgeN27wZOnQKaNw/+zTMygG3bgIsvBkqXDl4+GAcPApmZwEUXBbbl5gLr1vH/bdo4q2f9eqBsWaBu3cC2tWuBihWBChWArVv5M8qWFR9/5gyXadgQKFNGv+/oUWDfPv6/Zk2gWjV9+w8dct7OSGHVjnXrgHLl9P0iiT82bwaKFOFrODMT2LIFSEric3sBsGrVquNEVDloQSIK+gJQD8BGzfs0w/6TTuppU7gw0f33k5CsLCKA6O23xfuN3HEHUaNG9mUefpiocuXgdQ0cSFS7tnjf9u1E115LtHQpv1+yhNv522/O2hmMe+8lqltXv+34cf4MwHk91asTDRqk31a6NNFTTxH98w/XNXOm9fF//sll5s0z7/v0U943fjzRjh36fa++6q6dkWL4cHE7qlUjGjw46s2ReEzz5kQ9evD/a9fyuZ4+PbZtiiIAksmBjQ11hH5EUZTqFHC5HHV8ZAELt33BgsDw4cBVVzmr57bbgCuusC9zyy3ipwEjPXoATZuK950+DfzxB5Cayu/r1uV2NmjgrJ3BELm8ihd3X8+wYTy61pKfz/1dsybw2mvm/caygPj8tG/P37lPH/2TDQB06gQoivv2Rothw4BG0hsY91StClSqxP9Xq8bXY+PGsW2TH3Fi9WEeof8XwPPn/38ewAdO6mlTv35gpOuETz4hevZZ5+XnzSO67z6ijAzx/o0bierXJ5o/33mdyck8Gvj5Z+fHuOGee4jq1TNvf/RRoooVw6v7/vuJvvvOWdnFi/l7/v67eH9uLtFPPxH9+294bYoUkycT9eoV61ZIJBEBDkfoTmSLkwAsA9BIUZT9iqI8AOA9AF0URdkO4Prz74NToQJw5ZVWdxb2g54+Hdi2dCkwc6a4fFoacOSIftumTcD48eyDBoD0dParqmRnAykp7IMz1nX4sHW7gMAoNDsbOHAAOHdOXN4L8vN5RNK5s/Njjhzh76Hlq6+AO+8EcnK4b8+etT6+UiXg9tv5c42cOcP91rMnMH26ft/p01x3rLnjDnPbAHG/SOKbnBz+DRp/x5LgBp2I+hNRdSIqTES1iOgrIkolos5E1JCIrieiE44+LTOTJyBF5Oaya+DjjwPb8vOtH+cffxzo0EG/TXUbqMe8+ipwySXaLyOua9gwoF078T6jQV+zBqhVC1i4UFzeLRdfbP7s9HRue8eOzutp0wZ45hnxvm3buG9nz7Y+vkkTYMoUsevpu++s3TUjRnDdfqV1a+DZZ2PdCkm4DB4MvHd+3LhjB/8Gf/kltm3yIdGNFP33XzYATiGy9rkXKGA20Op79RhRGcCdz7d4cVbJqOoQ9Viv8si/9BLwww/e1GWkTBngjTcC/aHe8NxidQ78xHvvAZWDiwAkccrixazaAgLXo1zLwUT0f6nBjKn2JNmN0AsUMBso4whdUZwbMauLo2lTvpDUyVqvDbodTz4Z3vEZGfzk48Sg//EH37SWLTPv054Dq/MR6x9XZmZg4lqSeGgHd+o1GOoAJYGJrkG3G3GLDEXZsoGZbVF54wktVgwoX15/4rWGplw5oFcvvY7a6rOt8Hp08NxzQJcuobfHClX4WKCAM4Oek8NafLdPNH5RuMT6hiKJLCKDLs+5ieiP0N08vn/9NY8creoxntDHHwdOnAjI/oxlGjTgibP27Z23Yd06Lr9iBb/3enRw6BAHSXmN1v2k9nlennV549ONFvX4CROA/v29a6PX+OXmIvEe7dO6dLlYEqoOPXTsfOIffhhcW67St2/w6MRu3XjyJBi33249KXr6NLByZUB9U6sWt1MUvRoKXunQX3lFr7nX6sorVeI22/WXcf5BS7t2fHyPHuZI0xtuMGvT/cSrrwL168e6FZJwSUoKTL5XrMjXY+vWsW2TH3GibfTq1SYpiaO8nPLmmxzp6JSpU4n69GHNtIg1a4gqVSL69VfndQbTZ4fLgAFESUnm7Q89RFSlSuj15uRwHbNnOys/axZ/zxUrxPuzs4m+/ZZo06bQ2xRJpk7lqFuJJAGBVzp0TylXLpBXxXxnYXnd8eOBbcuXA0uWiMsfOQJs367ftnkzMG1a4P3x41ynSm4ub8vJ0R93+DBLoazaBQQe97KyuM5Tp8TlvSAvjzX7N97o/Jhdu/Sa+0KFgDFjgJtv5u+7bZu9HrtWLeD++8VKkVOnOFfM3XcDc+bo96Wmct2xfvzt04djEIzs3GkdYyCJT5xczxco0TXop05ZG878fA7lHTNGv83KL/rGG8Dll5vrAALH/O9/eteIldF59VXg6qvF+4yuiO3buZ3z5onLu6VlSw6f13LmDPDOO7zPKZ06sQRShYj7g4iTazVuDPz4o/XxLVpwIJLIPTFzJtC2rfi40aO5br8qDoz9IolPevcO6NAPH+ZrburU2LbJh0TXoG/fzhOdTglVh66dPAlXh16mDEe3qr5jrydknnkGGDfOm7q0ZGRwfpwRIy4MHfrw4UDJkrFuhSRSrFgRGAzKSVFL/KdycapDF8kWI6FDb92aUxC0aqWvOxoj0mHD3JU39h3ARt2JQf/pJy6nBm9oiQcdem4up2UQEeu2ScJH6tAd4R+DLjIU1atbq1SMGnPAnA/cWGfFiuwHNoaquxmxe62BHTrUnGEyFPmd8RitysWJQVfdM6LzEw86dCv83j6JM9TMoYDUodvgH4MuYvx4az+ZyJ0ybJg+V4zx0axBA+Cbb9z5pv/+m/PBrF4trjNcTpwwJxnzglAMOmCvQ//2W+Cuu7xro0TiFKlDd4R/dOiKAnzxhfOVb26/PfgKQ926BVY/smPAAOsskKdP80o+anbFqlW5nVa6dbeILsqiRd3X8+67vBKTitaglyzJbbZL9mWnQ2/Tho+/+WZW32i5+WagShX/+tnffdffycMkzmjVKhBnUaYMX49OY1YuJJxoG716tWnYkFcAcspTTxE98YTz8l99RdStm/X+lSuJihYlmjPHeZ1z57I+e9ky58e4oV8/oosvNm9/8EGiGjVCr/fUKaJnniH66y9n5SdP5u+5ebN4f1YW0ejR7uIIoskvvxA99lisWyGRRAT4Uodepox+7Uwjq1bpc2uvXs3pakXs38/ltWzbBvz2W+D94cNAcnJgtJqfzzpy46h4z57AGp5GjK6IrCyuU6uX95q8PNbs33KL82M2bNC7m0qXBv77Xx6V5+Vxm7U6dSMNGwJPPGEegQOs9126FHjkEXPa4EOH+DzEeoKqe3d96mUVY79I4p/cXL6eI+GqjHOia9DT0jiFrhVt2wJjxwbeE1lPan3yifmRy1j+66/ZNaKqH6x8bu++ax3EY5RCHjzIdc6aZf093HD55ey20HLuHIc22y0ZZ6R7d15mTiU/n/OqZ2fzTahdO2DiROvjW7cGRo4UL3CxaBFw/fXi477+ms+buqhIrLBSuXTrBrz+evTbI/GWq64CPviA/z99mq/nSZNi2yYfEl2DvnMny+OcEooOXVveavLEjfKhcmXgppsCq4t7PSHz+OMcACUinFWR9u/nNk+c6GxSNDeXU9CKyvjVP67llVf4qUSSmKxfH3h6l5Oilvhb5RJKPnSRZtqJO8Dq4mjfnsPdL77YfZ3h8uqr7sqLdOhOVS7ff8+Tpykp5n3xoEO3w89tkzhDO1gLN1AugfHfAhdaLrrI2ucuChqqWpWXUlMx3smrVGE/cJ06obfJ69HBwIFmxUykdOh26XONriUtdjdhv+i8rc6HX9onCQ+RDl0adBP+HqGPH8/yJKt6jD/i554D/vkn8N544uvXBz77zF3q219/5eCmTZvEdYZLZiaH6XuN2xG6nWxR/c4TJwL33edZEz1HGu/ERerQHeEfHTrAixE3a+asnttu0y8ALeLmm1mbreYXz8/nUao2HB7gLIPGVYNUMjN5hXF10q98eXZPRFKHXqQIr77kxoc+apQ+U6LWoCtK8L61Cyxq2ZK/c5cugbkElVtvZX1woehfSo4YNUo80SuJLzp1CogEihTh69Eqc+uFjBNto1evNo0aER044Fx8ed99RI8/7rz8iBFEXbta7//7b9Zau8mHPnUqH7N+vfNj3NC3L9Ell5i3DxxIVLt26PUeO0b0+uvO2z1uHH/PvXvF+zMzid5/nyg5OfQ2RZK5c4leeinWrZBIIgJ8qUMvVUofzWhk0SL9pNzGjeac5yp79vDydFo3ws6dvLqQyv79XMaY/9zIv/8Cf/0l3mccuWZnc50HDtjXGQ75+dxXPXo4P2b5cmDLlsD7SpV4UlUdlS9ezP1jRcuWwMsvc6yAkRMneGL4P/8x99PevXze7Pzz0aBrV+Ctt8zbjf0iiX/y8/k3KOMLTETXoKuLIVhx3XX69Lp2ssWJE7m81pAYdegzZnCZ9PTAfhGjRgE9e4r3GX3LaWlcpxv5pR3XXceLM2jJzmadvZuQ9QEDOIe6to79+4GzZ/l9ly6c79yKNm2AN980LzEHcODQbbeJj5s0Cbj2WutMh9EiM5NvPEbuvJPjDCTxTcOGwEcf8f/5+fy7sYuruECJrkHfvZtHc04Jlj5XLaMSCR16rVq8fqk6cvV6Quahh9iQikhNDb3eLVuA2rV5UhcQyzy1ZGZy5F286tBfe83Z+rGS+GTnTuDkSf5fqlws8Z/KRWso7SJFRYY1Ejr0jh2BH34IGItoXkwffhj6sdpJUfWvnVtk/HigWjXg2DHzPqlDl8QakQ5dnlcT/jLoRmPRqpW1xFBkWOvX12drNJ74mjXZD6xmbbP6XDu8vpj69jWrT7yQ3xkNesGCocsW40GHboXf2ycJjjFGQo7QLfF3YNH//Z+1/1NkWJ9/Hpg92/xZ6omvW5fXJVSjPp3www/sV1YnFL2+mFhb4k1dWkQjdCf50O106N9/zxJPiSSaiAYbogVuJD7Tof/8s302Ri29e7MO3S53eNeuvMCxmkEwJ4cT+5QuDRQuHCg3ZAgncRKRlcWLW6sXT8mSwC+/uAtOskN0URYuzHp31WfohLFj+RgVo4GePNkcIavFTofetCl/58svN6/bqZ6HIkWctzWaGPtFEn8QseKrUaPAtl9+cZe87kLBibbRq1ebJk2IUlOdiy+7dXOnQ3/5ZaIuXaz3L13K4+HffnNe54QJfMzOnc6PcUPv3kRNm5q333MPUb16odd78CDRRx8R7drlrPz//sff8+RJ8f6MDKJXXiFavjz0NkWSBQtYJy+RJCDwpQ69eHFxvm2VWbN4dSCVnTv1+dG17NrFd2mtXG7fPr0scu9eLpOZad+ujRsDahAjxpFrbi7Xqa5AHgny87mvevVyfszChYFl8gBeqempp3heAQDmzeOMdVZceSXw/vuBqFotJ07wCP/NNzkPtZYdO/gpKNbpc6+7jlM/GFmwQN8vksRg9mxg8+ZYt8J3RNegHz1qnw+9Z0+9ttROhz5zJj+GafOgGMsvWMBlRMoNLWPHsl5ZhNF/l5XFdU6fbl+nU7p144WrteTlcQ6bihWd1zNkCDBiROD9mTMsXTxzht/fcw8wZoz18W3bskEUubC2bgUefFB83IwZHP6fleW8rZHg5Em+gRsZMiSgX5bEJ+fOcWK9Tz4JbOvTB5gwIXZt8inRNej79plXGbIjWPpcwDvZohUNG/JEYKlS1p8bDgMHikeWgNhAOSU5mTNPqsnKgk2KnjzJo+141aG//bY+06YkccjP50GZGiQHiJPzSXwmWwSc69CtAou05b0wvldeyRGW6mjZa5XLuXP6C1X7GdrVm5xgvLkBznXoY8fyzUuUEEzq0CWxRJTaWZQ+W+IzlYvRWFxzjfVMtshYN23KemtjfVrZ4ltvAUlJ7toc7HPDoX9/ng+wWtPUKXb50IHE16FbnQ+/tE8SOiJJrRyhCwnLoCuK8hSABwEQgA0ABhKRfc5XNz+wL78MXo/WSD3/vL6M0fjWrg289JLzzwd45PrQQ5wXpUaN6AQ1eGGE1NG4Wx266LPVbZMns0zRr0jjnZjIEbpjQjboiqLUBPA4gCZEdFZRlB8A9AMw3vZAu9He77+z0XXCrbey/tlOY3z99ZyVTU1yde4cT8xWqcL5xlUeewy4/XZxHfn5eldOoUJcp6oeCRfRKKNAAb55HD3qvJ5vv9WvqSnSoRtzmYvaITKKjRrxd27aVK/fB4A77uAJVW1/+omJE+Vao/FOoULAXXfp50hmz2Yll0RHuC6XQgCKK4qSA6AEAAuN4XmaNgVuvNF6/zXX6N936MCvkSPNZWvWNGcjfPRRTrc7fz6/r1pVv7jBP//wZyxYwDI3lYYNrV07RleEonCyfS8xGlH1M7SrLwXjssv07y+9lJ9wVPdShw72x9u5XEqX5pQKr77KWRevuCKwr3Zt5zfhSNKzpzgozdgvkvijRAkesGi56qrYtMXnhDwpSkQHAHwIYC+AQwDSiWi+sZyiKIMVRUlWFCX52KlT5khDLZMnA2vXBt4fOsTpakXs3Mmr8KiyPICzBWrzlO/Zw2XU9LlWrFoF/PijeJ/IFfH99/aa7nAh4shLY1pdO2bN0ucqr12bpYZVqvD7mTOBP/+0Pv6mm4DRo8UG/eRJvjmMHGn+3lu3cn8Eyzkfaa66Cnj4YfN2Y79IEoNp0/RrH0gAhGHQFUUpD6AHgPoAagAoqSjKXcZyRDSWiNoSUdvKeXn2iyzcey8wZYr2YGsXzeLF/BimTTFrLL9iBZfZvz+wX8S33wKDBon3iUauAwbwBeUFt99u/mwizn5YooTzeoYNAz79NPD+5Eke4atBVc8+q99vpE0bNoii/t67l+sXMWcO94dRqRNtDh0SB5oY+0USfxw/zgFvn38e2DZkCP9GJDrCkS1eDyCFiI4RUQ6A6QA62h5x4ID9AhdG3OZDN5b3Ih96ixbAE0/oIyi9TAx0553sKhIRzko7ixezu0EN5Ao2KXrokP7pSEs86ND/+9/gbiVJfJKfz/Nf2utXqlyEhPNL3Qugg6IoJRRFUQB0BhDcArkxDnYjdJGxNpb3Ih/6VVexq0HrKvJyhv3kSfNCFlpViRuC6dDt2jx6NNC6tXhfvOvQJfGNyO0pVS5CQp4UJaIViqJMBbAaQC6ANQCCR8K4Mejdu1sbGbUe7Unt0EGvMTca/aQkXm7OOHlmN2LPyeF8MSVKBMp5OTq49152CYWbbySYDj2YQXdy83TyubFC3lASF5HbU47QhYSlciGi4QCGuzrIjUHX+syMONGhG8vUqgU8/rjzzwc4f8SwYTw5q663GenRQax06FbnRm3PpElAv37hty1S+OXmIvEWUWCRHKELiX6kqN2PbtkyvczQjptu4nwldpK5Tp24jJpH+cwZXte0Xj29C+Xppzl5lQiRPnv5cuftDIbVKOOii1i145Rp0/R+fvViVyNnv//ePne83XxF/frcjyLt/Z13Aldfba9eiiXTprmbXJb4jxIleML+kksC2379Vbyg+QVOdA16ixb2+lGje+WiizizoTaLoErFiuZshP3784z4b7/x+3Ll9EvS/fMP688XLdJr3u201CKDbuUGChWRIW3TxnqSUoRxwY2OHdkHrwZfBEtcZedyKVaM3VXPPMOZIbV9Z9T6x4p+/fTnWqVp0+i3ReIt5cvzHI+W5s1j0xafE135QqFC9ivbfPUVj35V0tLEyaIAlj9+/jnn6taW12rO9+7lMsEiLpct4+XuRIj8d199xcdEkkKFOIjHKT/8ELiRAZy35o47gDJl+P2MGfrl+YzccQd/LxHp6ZyC9quvzCqlDRs41W+s0+dedhlLVI1MmcIRyJL4hYjz7WufZr/7jqOXJTqia9APHGCXhxVDh7LhUbEbNa5ezY9hhw5Zl9+0icukpAT2i5g6laWJIkQz7E884Z0OfeBAsV//u+/0icaC8dpr+tw3Bw9yRKyqD3//feDjj62Pb93aOif88eO8uIWIhQs5102wRUQiTUqKONDE2C+S+GP3bk45oc1//vLLUocuILoG/fDhQJCPE8LVoVvJFt1MnnXsyBePNoeJlxMyvXuz0kWEm9zxRubN41w26tNJsGyLO3cCf/8t3hcPk40jRwI33BDrVkgigdUi0XJS1IS/0ucC9rpyUT1OdOhO5E12OnSj39/LwKKDB1mRIvLh27lIguFWtvjJJ8DXX4vTJMS7Dt3PbZMER/SULGWLQvxl0I3G4u67raP/RDr0Ll30vlyj0W/cmP3AF19s/7laMjJYHVOlSmR06IMG8Sja67wUbmWLTnXoxmyLfhm9y3zoiYuVDl2O0E34y6Ab0a4haETkTjEu5WYsU6MGLyfnhv/9j7MM5uTwRKVar98vJqNs0YkOPZh7a9w44L77PGui50jjnZhIHbpj/KVDX7fOPme3ls6dWXFRp451mcsv5zKqOyM9nfOjNGkSUH8AwH/+w8l+RIhkiytX6o8PB6uRZbNm7nTos2frdebGH8E339j3vd0IvXp17keRPPHuu4GuXf2rCTb2iyT+KF+eB2taHfq8efK8CoiuQW/dWqwVVjHmJC9dmlUqH3xgLluqlNl1cv31PCKdN4/flyypL5OczGWWLNH7xStX5pcIkUEX5d0OB5GhvfhidyMQY9DPLbdwAJVqaIPlLLcboRcuzJr/wYP5xqfNJV++vP0iI9Hi/vv5Jm/Eq4VIJLGjShVWaWmpVy8mTfE70VW5KIq9y2XUKM4SqJKba102JQX48EOeVFTJytLn5d6/n8vs2WPfrsWL+bNFiAz66NF6zXckcKtDHz8e+OWXwPu6dYGbbw6MYqZN49V7rBgyRC8L05KRAbzxBmvdVQmoypo1HPgV6/S5LVtyEJoRY79I4o+8PE5il50d2DZ+PPDzzzFrkl+JrkHft09vgI28+CIvSKBi5wbYto1zfO/da10+JYXL7NgR2C9i1iz+bBGiGfbXXwemT7f+Hm4YOpRdPkamTXO3aMSHH7JbRWXHDr7g1TrGjbPXobdowTcAERkZ1sf+9RdHkGoXGokFW7aIA02M/SKJPzZtAipU4EVaVEaMkOdVQHRdLkePcqrYGjWclfeDDv2GG9i9YzzeqwkZKyOam2u/wlAwfvmFk4qlp7PLJJgOff164NgxsdsiHiYbR4/mfDXGVMSS+EcuEu0Y/6lc/KZDv/JKfmnxUoe+Ywc/UqoJxLQsWeKurnDyoX/8MSc8EgV+xbsOXRLfyPS5jvGXQTcai8cf50hNu3q0Rqp3b33GQaPRb9aM/cCNG9t/rpbUVM4RY8yz7tXFNHQo16/NYRMK4eZDt3sa0p4zY+ZCv4ze5Y87cbEKLJIjdBP+MuhGRFkWVUTulGeftS9TtSrQt6/zzwfYBztihH5CJh4e97xc4ELtx1GjZD50SfSRof+O8ZdB375dn1f7zBnOzmiMTgSAK67gCVZtCt2cHD7RagBQ69ZcpkIFfp+aykm92rXT691ffhl46ilxm0SGbtUqTinrBVYjy8suc6dDX7gw8L0Bs0EfNy70EXr58tyPIu39fffxk5Hax35j4ULx9SOJH6pXZ5WV1i05b5675HUXCNFVubRpY9aaa6lePWA0iHgy8p13xGWLFuXy2nS8HToAPXsG3hcpwmVU6d6qVTzJaVwdvkyZQN5wIyJDV7Wqt4E0IkOq7QsnVK6s14PfdRfLMVUjX6ECUKmS9fHB5isKFOCnG6Ncs2RJbmusf1yPPMLuNCNVqvhDJy8JnRo1gFde0ceUVKokz6sAfy3n/u67gaAg0cy2lj17gOHDOUugCpG+/KFDXGbrVvvP/f134K23xPuMdQKc2c8r2aIVhQq5cw+NGcO5v1Xq1OGVhFQjPXWqtdYeYOmklU49K4sj9ebO5RTIWlau5BS1GRnO2xoJmjTRBzypGPtFEn9kZbE8Wbs2wrhxUrYoILoGffduzq1thRuDvm8fP4ZpA12Mo8zDh7mMuiiDlXsjmEE3jlw//hj46Sfr7+GG557jm46RmTPdGUmj4Vq3jnOqq0yfDnz6qfXxl15qPQGdnW3940lOZl1+rHXoa9fqYxhUpEGPf1at4kA5bdDh//2ffaDcBUp0DXpqqvOFEEQJebSIVC6R0KH37s0JuozHezUh07kz50IxkpUVXjTqtGmcZ0UlmDLn77+BOXPE++JhsnHsWPeJ1yTxgZwUdYy/JkW1BBuhi4y1cTQt0qoH+zwjHTuaR65eyhbXr+fv0LKled/q1e7q0rYpL89dutFRo3hULwp0Emn77T7bb/i5bZLgyHzojvGvQS9QgNUnV1xhX4/2pN5/v171YjT6bdqwH9iNDn3/fs4j0ayZvrxXo4Nhw/ip5a+/wqtHpEPXTlR6IVsEzJPBfh+9+719kuDISFHH+NegFypkvY4lIB6hP/mkuIx6QVSqJHZv2PHBB8C337JR19br99FBfr67EbqTNAvvvKNXEUkk0cDK5eL332AMiK5BL1jQ3qAfOBDQDOfnc+6X0qX12nSVNm2A06f1evC0NL4RqLlXLrlEX+bIER4Jd+qk102/9pp1ci7RyHX1au9kelYXZadOgfUfFsmNAAAgAElEQVRAnfD33/p2Gg366NGhj9CLFuV+1EpEVe6/HxgwQHyO/ICxXyTxR1ISz2Np01bPnSufvgRE90pv2ZJ1wVaULh0wvmfOsL55zBhx2YIF2XBrg2kuuwx48EHrMmvWAH36AP/+q6+raFH+bBGikWvJkt4FFgHiC7N0aXcJ/EuW1Kc9ePJJvRunWDFz2L6WYCP09HTOJa+qkFSKFOG2xtpoPvOMWOVi7BdJ/FGrFl/P2pz+JUrI8yrAX0OXl14KyAGDTYoeOAA8/TSwYUNgm1EzfvQolwk2uThnjjltgLZOo7H67385H3MkcZsPfcQIlnKp1Kypn2idNo2X0rPigw/0Mkct+fkcuLN0qfmpYdkyngc4dcp5WyNBgwZA+/bm7cZ+kcQfGRkcDKhVyH3xhf0SlRco0TXoO3faa6s/+wxYtIj/D2bQjx/nxzA117l6jNb4pqVxGTWwyMq98ddfHCwkQjRy/eYbfW7mcHj9ddbfG5k3T++3D8bEifqE/3/+CXz9deD9woXWTzsAR/BqJ36NWC0SsX498NFHsdehL1sm1psb+0USf/z9N8dJrFkT2DZtGjBpUuza5FOia9DT0uxXIdIimgjR4kSHLioDuPO9DRxoDsjxcob9iiv0y+GpZGaGZ4gmT2Y3hEqwSdF586yDpeLBVzlhAvDEE7FuhSQSSB26Y/ynclFPnkh7qsWJDt0qsMgNl13GLy1eamCXL+e6Lr/cvG/79tDrdStb/PhjdqeIVCwyH7okloiCDKUOXYi/DLrWWBQvzgvDGheXMNajPanDhuknToxlOnRgV0STJs7b+++/7PrQGnUvRwcvvshPLW4XswiGl7JFLVqdP+Cf0bv8cScuosGdHKEL8ZdBBwI/zOLFOc+JFaLR96OP2pcpX946UMnKILz/PrsjtCv5+FUDa1yxyI1Bt5Mtqse/8AJw003BPztWyKeHxMTK5SIxEV2DXriwvdHQTgLm5nLirSpVxKlqGzc2/1D37+cbgTqKrFtXX2b/fp4cvPlmfSrZt9/mlwirfOheXVBWxuamm3iNT6doJ4wAs0FXF+qwwljeSG6u+DsPHswvv2LsF0n80bw5Z1esVy+wbdYsadQFRHdStHlz5/rt48c5//H334v3i05mhw76Ub2xzLp1wL336jM0BkPkivD6Qgo2TxAK77zDMkOndQVzuezbB7RqZZ3AK9a88gqwYEGsWyGJBHXqAA88oB+ESWMuJCyDrihKOUVRpiqKslVRlC2Koghm9lzw1FOszgCCyxaPHQMGDWK5mopxNH3yJJcJ5p/++WdgyBDxPtEI/d137XOLe0GRIpzp0Slvv61X41SpAtSvH3g/Y4b1dwRYq22lQwf4B7V2rVlK+eefPEJ3I7GMBLVqiedGjP0iiT9OnABWrNDr0MeMYcmvREe4I/RRAH4losYAWgDYYlt6+3Z7P+6ECaw5BYIb9NOn+TFMqwQxjjLPnuUywXToyclcToRo5Przzxx67AUjRpjT8wLsGnIT+j99uj6Kc9YsTimrsmoV8NVX1sfXqaN/pDXy++/i7Vu3Al9+GXsd+sKFet29irFfJPHHokX89K39rf/2G2vRJTpCNuiKopQFcDWArwCAiLKJKM32oFOnnD8qhaJDN0aKepEP/ckngc8/Nx/v1Qx769b8MnL6dHgLM0yezNGfKsEmRX/8kV/B8Ouj7qRJnJ1TkniIbEGw6/kCJZxJ0foAjgH4WlGUFgBWAXiCiHRDNUVRBgMYDABteIN9rV7q0L3Ih96mjXmblxrYBQv4u4iWTzt40F1dwVQuROIl9QB2S6jrhoaCn5Ukfm6bJDgiHbqULQoJx+VSCEBrAGOIqBWAMwCeNxYiorFE1JaI2gatUWtoypfnDIFu8qG/9RZw++3m+tQTf/XVPDFq9LXa3WTWrNEvfaWW9+pieuMN+zTBThHlQ3dzc3OqQ69a1f5z/Ybf2ycJjlzgwjHhjND3A9hPRCvOv58KgUF3RalSgRStpUoBDz9sXVbNpKiNhhw0SF+mQAF9tsUyZVhpY6Ro0UDKXSPvv89GXV2XFOAMfmqaX79QooReQWQ06MWKcVZE40pGKsF06DVq8JJ2nTvrtxcubN13fsDYL5L4Q+RykdkWhSgUxl1OUZSlAB4kom2KorwGoCQRWaQtBNqWKEHJTtcUzcoCNm3iiTpt7nI7Nm/mkX316uL9KSksu+vb1z6Nr5Y77uAEVFvs53tD5uqr+ab0xx/67TfeyHMOWhWPG267jScsN250Vr5jRzbM8+eH9nmxZtAgPrcHDsS6JRKv2buXU2R07cqDsgsQRVFWOfFyhKtyeQzAd4qirAfQEsA7tqXdhNzv38/+a1GOays6dgTee896/4YNwNCh+qjPYDh1RYRDJOofNy6QudIJwb7nrl28wIBXWSa95t13Q7/5SfxNnTrsSr1AjbkbwjLoRLT2vH+8ORH1JKLwxMhDhgRyVwdTuaSnA/366UeURrdBZiaXmT3b/nOnTgX69xfvE7ki3nor8hrYokWBXr2cl3/5ZXYPqZQrpw/EmDWL++LcOfHxv/zCqWatuPdeTn9slCcuWsR95yaqNRJUqsQ/fCMvvaTvF0n8cegQy2bPng1sGz1aZtcUEN1IUeNKQUamT2e9NBBch56VxbI+Ox16bi6X0fq/RWzcGAhoMiIauf7xh7Uu2y1ffMEXp5GlS925D+bP10/eTpyoz3++bRv3RU6O+PgqVYDKla3r//NP8faUFO47p660SDFnjjiAaP587xOfSaLL/PlAly5s2FWWL7fO0X8BE12D7ib4RCRV0iJSbUQife7rr5uDjrxUuVxyCeelMZKWFt6qSJMm6VfqscoNr/Lll9Y3NS1+VY1MmyZH4omKlQ5dqlxMRD/bolOCjdBFxtpqgYtwTrxoFR8vL6aZM3lS9OabzfvCWdbNSrZoZdDHjOHw+X79Qvs8P/+4/Nw2SXBk+lzHRNeguxndVa8OfPsth/yKEBnrL77Qj3aNRr9zZ/YD16zpvB1LlrDv7sYb9fV6dTG9/z7L6kQGPRzcGvRg2RZVatTQv/fLiN3KaPulfZLQkSN0x/hrhF61amAmu2xZ4K67rMsWLMhGX6tFvftufZkCBbhMyZL8vmRJXkzYSOnSXE4URfnf/7IvW2vQK1UKaNv9QqVKenmn0UCXKmU2xlqCqVxatmQ/pnG5vBIluO+c3Awijaj9xn6RxB+iEXr58vZzPhcoYenQ3dK2bFlKTk93VvjMGZ4gbdJEr9awY/lyNloitQPAk7LTp/M6ocaIRyu6dePJGHWy1muuuIJvSsZJ1uuvZ0WK1WRkMDp3BrKz9Sl07WjWjNMVx2vCo/vv5z7cuzfWLZF4zb59LFy49toLNkgsWjp0dzRs6Lzszp3ANdeYw+7tuOoqcyItLZs386o72tnyYASLoPSCSLgFZs92FyQUbIS+bRvfBP2qLBg1itP7ShKP2rV5wZcL1Ji7wQfPyRruvjsg4QumQ8/KAm65hTXkKkbjm5/PZSZNsv/cSZOA7t3F/mWRoXvjjchrYEuUAG691Xn5Z54Bhg8PvC9WTO+Omj+f++L4cfHxy5dz+mIr7r6b0/lmZem3L1zIfXf4sPO2RoLSpcWuFWO/SOKPXbs4n7/22hszBrjzzti1yadE1xEcTIc+f34gL0iwbIv5+aw91vp0RcZ3zhygfXv+38q9tH27dUSqyK+enMyPgV7w/ffi7/j339auIxF//snBRCqjRnGelUce4ff793NfaIMztJQubV//ypXi7fv3c9/FWoc+bRo/1RnXoTX2iyT+mDuXI7yPHAmk7Ni4MX7TVESQ6I7QraIURTjNh+5Eh2405G5cHKNG6fXc6vFezT3UrSs23Kmp9u6jYEyZAvz0U+B9MJXLBx9YL/enxa+qkVmzgM8+i3UrJJHAapFoqXIxEV2D7sQYhJoPXaRbtzLowT5bS6NGwKWX6rd5KZmaPJknakXk5bmrK1g+dLs6x41zlzfH7rP9hp/bJgmOTJ/rGH9p77Qn7KKLeITZrp24rGjE+eOPHHlprFMtc/PN7AcuX976c43MmcPHd+smrjNcRo1iqaab9UNFOM2HHqoOvUgRVs0Ynyb8MmKXOvTExWqELgOLTPjLoCclBXxk5coBPXpYly1QgGV26kSYonDKWCONGwMVK/L/RYuKtasVK3JdIj78kPOfaA16zZps3PxEnToBvT1gNtBly/LThpV+PpjK5ZprOJe8Oh+hUqYM950fdPmi9hv7RRJ/iEboVavyoE+iI7o69IoVKTk11Vnhkyd5YrB9e2cBBPn5rENu2FC/2r2WTZvYT/zYY0C1as7ace21XLcb+aQbOnRgY2tcyLhTJx6ZhPq5bdvyjefnn52Vr1+fc7PbKV38zH33cebH3btj3BCJ5xw4wEngOnTwx8AhBvhTh25laEVs3cqjYqcBPTk5HM1pJ1HcuhV45x13qV5jmQ89nM9NTnZuzIHg33PjRpZSaida/cSXXwbPqimJT2rWBK688oI15m7wlw69d29gxAj+P5jKBeCFldVMiFblb7wxuFrk2295RJyba94nki2+8YY5zYDXlCnDunGnPPooa66tWLyYR+C7don379oFfPWV9fF3382SR6Pf8rffuO/cLBoSCQoXZpeakWD9IvE/GzcC33yj/31+/jmnopDo8Fc+9GXLAmWCqVwATpyVkmJffulS1icD1hNne/eywRPtFxn0LVuAFSvMZUPhl1/4hmJkxQpr4ytizRpeKk/llVf0N7LUVO6LjAzx8QUL6tdnNWIVhXn0KPedlb49Wnz7rXjREWO/SOKP2bN5gRVtLv+UFOdpLS4gomvQrRZXEOFkhK6VLlmVF+lV3bgyJk4Evv46eJ2hYrWwxNGj4enQp07Vr1MaTOXy/PPAd98Fr9evqpHffotf/7/EHqv0uVK2aMJfLhfAuQ4dYCOllrMq70avKipXty6/Qq0zGOPGOQvocUI4OvQJE8Jb2cfPPy4/t00SHJk+1zH+yoeu3d+iBbBgAadttSuvGvLixTlEuFEj6zK9erFroEgR5+2aPNm8vqeXGtixYzmbZLh5KYLp0FV3Sqg69PLlWXlknNj2y4hd6tATF9HqZVKHLsRf08bNmwcCV8qV40lPO9q0CSxWUagQ0LWruUzr1rwSD2DtJ65WjcuJGDmS26I16A0a8CLVfqJRI/2koNFAlyvHUkZtwi4torkCLV27cmpi4w22QgXuO9GEZLQRtd/YL5L4Q/T0XauW9W/2Aia6OvSqVSn5yBFnhY8cYR9w587OdOjZ2bycW4sW1gEHa9awP/yFF3hRBie0b8+BR3PnOivvlnbt+PvNmaPfftVV/CSxYEFo9bZoAVx+uXM/fKVKvPycaKFlgA2+avT9OOq97z6eVJfSxcTj8GF+tWjhz2svCvhTh167tvOyGzYA/fuzdtwJGRkcKTp7tnWZHTuATz4BTpxw3o5gI1cvENVfpIjZNeSGdevcTaoWL27/eWvX8tONX/Ohjx8vjXmiUq0aPxleoMbcDf6aFL3pJuDNN/l/JyqXa67h0Hy78p07A++9Z/+5X3/N7htROL9ogYs33uC2RpJChXg1I6c88ADw0EPW+//5hx9RV68W79+3D/joI+vjrXT3v/7KfefXCM1g/SLxPytWBNZJUPnyS76e3SawS3Cia9C3b7ffv2EDsGcP/+9E5bJuXSAvuVX59esDy5JZuZcOH2ZDJ9oviqDcu9c7bfMffwA//GDevmGDu+XU/v1X379DhvCEq8qZM+xyOn06tHZaPSmdPMl9Z1z4ItqMHQv85z/m7cZ+kcQfs2Zxug4thw/z9SyVLjqia9Dd3E290qGL5E1WNwnRxTF3rjkfupeSqVKlxMmjDh3iUUio/PyzfjQeTLb44IP2OnS1z9z0XTRZsiR+10OV2CMaVInWQ5D4zOWixUsdunafKB+E3WdUrWqelPVSMjVypPmG4QVuZYuTJ1u7Y4DI5JuRSJwgcnsGC5S7QPHfAhcqHTuy78y4uISxPvWEVqjAy4317Gldpm9fjlZt0sR5O774gvOsG+v0amTw7be8XqLXuM2HHmwxbDU7ZVKSN+3zGjlSS1xEMRLGBW4kAPymQ7/88kBe8nLlzLm3jVx5JafLBVihIZpE7NjROte5Sp06PMEquuF8+ikf37dvYFujRlyvn2jVSq+xF+nQr73Wen3NYNkWb72VF5423mCrVOG+s9K3RxNR+439Iok/RNdm3bqcFE4+IeqIrg69Zk1KPnDAWeE9e3gR2F69WCMdjDNneCR9xRUBI29kxQoecb/9tnMd+qWX8ipIU6c6K++W1q05OGrmTP32jh3Zvx7qQrhNmwJ9+ogTVokoVgx48klrRVBeHktDS5TgzIZ+Y/BgdhklJ8e6JRKvSU3lQL4GDWLdkpjhTx26UyMKsO558GDnSo/UVGDgQPt8JCkpLFF0E+UZzBXhBaJRRrlyvPBFqGzc6NyYAzxPULq09f61a7lNv/4aepsiydix0pgnKhUrXtDG3A3+mhS95hqO4gTE+RuMXH018OKL/L+VyuXaa4HnnrP/3C+/ZLeKKAWs6HHvrbc4jD6SnDlj/aQh4q677HO0b9rE9f3+u3j/vn3ASy/Z1y9izhzuux07nLc1mgTrF4n/WbAA+N//9Nu+/pqv51BluAlKdA16sB99SgqH/AMBA23nI0tJYT0qYK1y2b07UMbKvXTyJGuVneZDP3bMOwOWnCxeBWjXrkBfOGHfPv0iEz176tUzOTncZqt86MGwWsAiI4P7LtZrrI4cadYqA+Z+kcQfM2eanzbT0vh6loFFOqJr0J3464PpyrVoJYmR0qGvXGleycdLHXqBAuLvuH+/ezmjtk2//qpfUCSYyqVHD/s0vn7Xoa9YYT3fEOu2ScLDTocuVS46/OVy0Z40pzr0YPnTtbLFIkU4DayVBEpEmTLmwB8vdehvvQWMGRN+PcHS59r9AIg4R4vdilLxqkP3e/skwRE9JcvAIiFhG3RFUQoqirJGUZRZDgo7r/jGG4HNm+39yFrDWrs2pwLo1s1cRj3pffpwYi43vun33uOgG6s6w2XqVGDePG/q0pKX586gA/bnR01THEwCKpF4jUiYIHXoQrzQoT8BYAuAMmHXdP31QLNm/H+ZMvwKVr5xY/6/aFHOp26kc+fgwTBJSXwjELk+xo5lvXu/foFtl14a+eRcbunY0bxikVZ/Xbo00L27WGnkxL3Vpw9w7pzZoFevzn1XqlTobY8kxn6RxB8il0v9+nw9+1FCG0uIKOQXgFoAFgC4DsCsYOXb1KlDjtmyhWjECKLUVGflT5wg+vhjom3brMssWUJ0221EBw44b0fdukT33OO8vFuaNyfq0cO8vV07oq5dQ6szP5/okkuIPvnEWfnsbM52/uab1mWysoj27SPKzAytTZFm6FCiTp1i3QpJJDh1iujo0Vi3IqYASCYHNjlcl8tIAM8BsHzuURRlsKIoyYqiJB9zU/Pq1cCwYcDx487KHz4MPP44Z2CzYu9ednGcOeO8HbHSodeo4U63b6xv82Zg6FDnxyQlcfoEK9auZbeWduFpP/HJJ/5tmyQ8Spd2tsiNJHSDrihKNwBHiWiVXTkiGktEbYmobeVgEZ/t2wNPPMH/O5kUvfpqDj6yK3/ttZwT247PP2fjKZL0iR733nknsFReuBQrJl4i7eBB+0AfI7fdpl8mz8iePZyPZcoU877ChVkC9sgj1sdb6dBnz+a+27LFeVujyW23mfP7SOKLn34CPvhAv+277/h6PnQoNm3yKeEMPa8AcKuiKLsBTAZwnaIoE22PSEmxr/Ho0UAUp5OJutRU1pDblT9+PFDGijNneITvVIeekeHdhbRihXnSFdD3hRNSUwMrMWVn8xJ2EzWng4h17ZmZobXTqg/PneO+y80NrV6veOcdTgFs5MQJdytUSfzHrFnAxx/rt509y9dzrK87nxGyQSeiF4ioFhHVA9APwEIishjGuapY/9fO3aFVubjRobthzx79QhFe1On0cydMcHeM2qa8PM48qc2bY5c+9+xZXpBbNHpX8bsOfd064K+/YtsGSWSQ+dAdc2Hp0EuUYD+wKCe6FQULmrP1ealDf+EFjnIMF1HfOZUt5uay/1ld/SlY/U62SyReIWWLjvHEoBPRIiLqFrSgmx9/375sYOwWltYa1ksu4XD5G2/Ul9Ea/V69eGK0fn3n7XjuOXMEpZcj9FmzgKVLvalLRQ2HdqtDt3saqleP/6oyUb8hR2qJixyhO8Zf+dBvvTVgMEqWFC/NpuXmmwOz30WKiA31TTdxdKgdjRsD/fuLR+4TJnACoDvvDGxr3pzLi/zrseK66wKGXDXa2ieLEiX4O4g0+U4Sod1xB08wG/u4Vi3ui3AyQ3qF6Fxo+0USn4h+Zw0a8HVXokRs2uRXnGgbvXq1adDAufBy1Sqi114jSk93Vv7gQaJ33iHavt26zO+/E914ozsdeqVKRA8/7Ly8Wy69lKh3b/P21q2JunULrc70dNaxT5rkrHxqKuvQR460LnPmDNHmzUSnT4fWpkjz9NNE3bvHuhWSSJCVRXT2bKxbEVMQJR26O4KNlLWsWgW89prz9Jj793Mq3W3brMscPMhh9qI0uVbESofesGHAzeGWMmWAf/7RR7faUbAg0LKlvdZ33Tpeus+vE48jRnA+GkniUaQIy3slQYmuQQ82gdG8OTBokL5sMB1679725a+9ljMJ2vHpp6z5FskERf67Dz5gd1BOjn29TqhYUeyu2LTJ3Y2ne3egSxfr/Wlp3OZPPzXvK1uWA7K0biUjVjnFZ87ksP/16523NZoE6xeJ/5kwgVcZ0/Ljj3w92yWUuwCJrkG3U1EArAc/d47/dzJRl5UVMHpWOvRz58xljOTkWOcJL16cRwjG8pmZ3kzILF5sTs8LcP1ZWc7rOXs28D2PH+cRt3bZvAIFuM5Q85ZbtSUvj89brNUGL70kDn7S9oskPpk7lxdT15KXx9eznB/R4S/ZohYnI3StysVqYi/cfOgHDvDjvLFOq/JesWuXPjDIDdnZ7CLRBtTYqVyOHwfatQOmT7eu0+869G3bOD2BiFi3TRIexlTQgFS5WOBfg+4kUlRrrK3Ka41+2bLsBzZmaHOrVPFSAzt0qDmsOVzc6tCzs3nlpGM22XbiVYfu9/ZJgiNye0odupDoGnQ3P65Bg9jva5f/RbtiUfv2bJA6dTKXUY19jx7sm7bTtht58EHOG2GsE/BmdLBwofeLG7s16E7cW2oO+SZNwm+fROIGkTBBjtCF+EuH3q9fQCddpIjZd22kR4+AdrxwYbHxv/XW4DeSZs04yZfo8yZP5tXuBwwIbGvZkssbI0hjyS23BCZpRQa9UCFgyBBuuxEn7q3+/YEOHVh3rqVuXe6LihVDb3skueUWd3MREv+hKObfWoMGfN25Uc5dCDjRNnr1atOwoXPh5dKlRM88w/pnJ+zaRfTCC0Q7d1qXmT2bqGNH1qw7pXhxbkekaNyYqG9f8/YWLcR50p1w8CDRddcR/fqrs/J79rAOfdw46zKnThGtWEGUlhZamyLNCy8Q9e8f61ZIJBEBvtShB1uBSEtyMvDhh/aqDKLA6HLPHuDddzm036rM0aPA33+7G7GJotTUOr163BONjFu2DD3Mvnp1YMECcxoEbV9oKVqUszNWq2Zd59q1wGWX8aLZfuSdd+wXuZYkFl7/BhMEf02KNmwYcG04cQN07gxcc419+S5d2FjZ8fHHfJwozarIoI8cyY+AbtLbWlGnDlC1qnn733+bb0523HADL7dmR4ECwOuvm7dXrQosWcLuCSsGDhRvnzmT+2eVbVr82OGkXyT+ZtQo83U7axb/Bv163cUIfxl0LU4m6kQqF1FWNmMZN1SpYl5oQjXwXowO5s0z53oOl507eS5ilmHd7nCyRPpdLfLUU7zuqSTxmD+fBw5a5KSoEP8Z9GDpcLWIdOiirGzh6ND37gVeftlcp1V5r9i+HZg0yd0xanvOnWMdu3ExC6sskXv2sHvHLnTe7zr0PXu4z0TEum2S8JDpcx3jL4OuNRaR0KFXrsx+YKOaJZY69HvvBd58M/x6RPnQjcoArcxTS1YWB+bY5c3xuw7dymj7pX2S0JHpcx3jL4Ou5dlnOazXLj2m1kBdfz2PTC+/3FxGPendugHLl7tbfLlXL3E+dMCbi2n5cl7Q2UvsomZD1aGr+vOmTcNvX6SQxjsxsdOhyxG6Dn/p0O+/P6BzVpTgP9A+fTiPCMAnWLTYcu/egTJWtGoFPP20OKPbTz9x0jBj+WHD/JUBrk+fQB4cK4P+1FPAlVeaj3WSD33AAFbeGCdwGzTgvhBN7PoBbb9I4pOSJc2Dp3r1+LpzMzi7EHCibfTq1aZNG+fCyzlziIYMIcrJcVZ+40aioUOJdu+2LjNjBlGzZs516Pn5rM8ePtxZ+VBo2JCoXz/z9ksvJerTJ7Q6d+xgDfuKFc7Kb9zI33PKFOsyJ04QLVjAf/3I8OFEgwfHuhUBNm3iPl22LDqfd/31Mh98AgNf6tCDkZERyIy3ejXwxRf2bo2zZwNZElNSODWsMR+JtsyJE8CGDea0t9nZ7D82Pr5Z+eVzcoBTp7x73BM9ibRq5U6HnpkZeBJJSuIni/bt9WVOnRJnHixVild2shvt/PMPy0SNCbByc7nvYp317rXX+Hoxou2XaKKmczCqMyLF779H77P8QG4uX8+5ubFuia/wl0Fv04bdLoAzlUu/fgEXglV5bRmrm8PYsRz0ZNShWxn0r7/mRF+HDlm3zSmXXMLh80aSk61VGyJ692aDa0fdusDzz4u3z5ljr9cfNky8fd487rvVq523NZr06cPL0EUbNadQgwaR/ywv8vL7mdde45eWpUv5N/jnn6HVOWkS8M034bbMd/jLh67FKx26G9miiKQkoMdid9IAACAASURBVEIF8fFeTIr+/HP4dRhZuZKDhKZM4QU+VKwmRZ3g9wnHIUM43/6cObFuCaMuWuJF8FkwTp2K/GfEkj/+8D45l7qYyz33hN4uH+KvETrgvQ7dTTCN8eIoWBDYsYNT3GqJhmRq61bghx/cHaO2JyuLXU/Gx1Erg75xI1CzJo+2rQi2DF+s5WNHjvASgyJi0bbJk/mvVZu8RHvTiPV5iAQyfa5j/GXQtSetcGFeLcipDl1ROKOg3Qi9Rg2WNxrVMLHUoffqBbzySvj1hKtDP3jQPseN1KG748AB/vvEE5H/LHWEXrhw7OcyIoGdbDHUG1iNGsADD4TXLh/iL4Ou5aWXzJGORrQGqmdP9iUaJYbaMjfdBPz2G4fzOyE7m/2vxohNL0foGzbwhK6RefNCz1MRCR262q/G/vUTfrm5AJzLv2xZd7n3Q6VqVV4kZcuWQDrpRMIusCjUQVXt2kD9+uG1y4f46+wPHepOz3z77YHkXOGUadcOGD7cHMSUm8v+O2PWwpYteZJGtLizW6xuCjfc4K6eAQMCChYrg/7ss8DFF5uPdeLeGjCAg4uMec8bNuS+q1HDXXujxYABsVG5pKfza+5cHkhEkurV+dwmKlWqsBZdS+3a/BsMddL5oYeAN94AnnzSXHc840Tb6NXLlQ598mSie+5xXn7FCqJ77yXav9+6zKRJRPXrEx065KzOjAzWEr//vvN2uKVBA6K77vK2zo0buc6tW52VX76cv+ecOdZljh0j+ukn/utH3nmH6OmnY92KAD16cJ9ee23kPys1lejzz4lKlSJauTLyn5cIfPEFnx87e+EjEJc69KNHA9LBtWsDE0tWnDwJHD7M/6ekABMmmGf8T57kCTOA9dIpKWY/45kz7EM2brdyRWRmso/UTxrY1NSABv/SS3mV9EaN9GUOHxanCK5QgZ9k7HTof/3Fbq1Nm/Tbz53jvrPLWx8NXnjBvJg3oO+XaNKqFf+Nhspl0iQecWZksKvnQiA7m3+DoriKYOzezaooIOH6y18GvVMnvjABsd/MyNChwXXoQ4cCV1zB/1u5N775hlUex4/rt1vVOWMGpyjYtcu+fU5o185seEPhnnuAm2+2L9OxIz9iGmnYkCWOouXpVN56S7x90SLuO7/q0J30SyQYPpylcdEwGNqbRjRuINHmsce4P7WsX8+/wd9/d1+f9gafYP3lL4OuhQQLSxiJtA69QAGgRQvzJKqXk6KTJ5vT84bLr7+yQsi4+HQi69AHDIhNAJEdZctGx2AkukH/6y/zgCGc36D2JptgI3R/TYpqcWLQI61DL1XKHOqu/Qy/amBzc8UJqawM+t9/A127cui41QSy3/Ohnzplbcxi0bakJH6CK1TI2bUcDunpLFnMyUlMgy7qv3B+g6oRr1+fF4BPIPw7Qi9VKrjipWDBgB+7SBFeAdyovdauWFSvHuu+jVkSQ9Whe2Eorr9eHI4fDm5lizk54lw2WqQO3TlEvODGgw96nxpZRHo6ux/uuSf0dWj9jEiHHs5vUDXoS5Yk3PKE/jXow4eL9dlaypfnSU+AJ/VOnDDLmLQulxtuAKZPN4fyW5GWxj5uY8Smly6XnTu9yQmjxc6giwJPnMgW27blv1KHHpwzZ7ifL76Y5yci3a5Bg3iB9AkTIi+RjAVeL3BRrhwvdJNgo3PAby6X554z65zt6NWLDbjdI22/fsHvwpdfDnz4IT8VaMnJYT/00aP67S1asKLCixzgXrkDHnggEIhlZdD/8x9x/zoJLOrfn90Ixh9Bo0bcd6IEY37ggQeir0NXR4Cpqdw399zjPJgtFLRzB3l55qfUeKdBA3OAVvXq/Bts1sx9fX378ispia9rqwn/eMSJttGrlysd+tixRP37Oy+/YAFR795ER49alxk/nqhSJec69CNHWKv62WfO2+GWunVZP+8lq1cTPfywc43tb7/x91y61LrMwYNE337LfeJHRowgevXVWLeCUfPLDxxIUcmJvno1rwPQsmXoOfQvRKpVIxo0KNatcAQirUNXFKW2oih/KIqyWVGUTYqihJ+0YvfuQA6MDRtYrWHH2bOcWErVl0+fbp4MPHyYE2wBvO/4cfOoOC2N19Q06sqtXBGnT3PyLD+thHPgAPcfwBro0aNZTqhl927OSGhEzWth98SxeDFw993cT1rOnOFtoeiBveTpp4HXXzdv1/ZLtChRgkflbdrw+0hPVPbowVGTxYsn5qSoiKws/g2GkmnyiSeA7t35aTPBVC7h+NBzAQwjoiYAOgB4VFGUJmG1plu3QDIjJzr01av5kWvZMuvc5cOHB8+HPmUKTyYZA1Cs6lywgPOYb9li3z4nXHutN37phx7i3N8A911urvn79ukDPPKI+dgmTYBx49jfa8Vnn/FfY51//81951cd+sMPc674aFK/Pvuz1ZzokTay6ekskYyWTDLa3HmnWYeeksK/wdmz3de3fTsP9KRBD0BEh4ho9fn/TwPYAqCm/VGuPiC4Qa9Uif8eP24/ERiqDr1wYb4ZGCMovZwU/fprHl16yaRJ3Hb1yUTFCx16rOWJVvTs6R/FgtpH0ciJnp/PT4yJbNCTk4F//9VvC1eHXq4cvxKsvzxRuSiKUg9AKwArBPsGK4qSrChK8jEnIdjaQKFghrdyZf577Jj1aDocHXqlSrwySo8e5joB/+nQ1fa7lS3Oncv7VphOX4BgBj3Whj4nxz8r93z+OUtjVZdcJEeBGRnc92XKJK5Bt5MthqpDL1eOYy+MiffinLBVLoqilAIwDcCTRGRyaBHRWABjAaBt27b2v3qtMa5cOXgmtXLl+EQfO8aP/XXqmNOHanXoF1/MfmA/6dAvu4xTE3z0UXj1iPKhOzXo+fniH42o/lCfdiJNrG8oWtLT2cdbowbPWbhRboXyWQAb8xtvjKyaJlbk53uvQy9XLjq56qNMWCN0RVEKg435d0Q03Zsmnef11+1HjACf5IoV2eVy110czKGO2rVl1JN+3XWct8Wp/vTgQb5RTJtmrhPwxogcOuT9CC4Ugw7YG+fLL+e/UocenPR0HlgUL84BP8WLR+6zypcHfvyR15Pt3Rt4883IfVas8FqHfs01gQlrdTCTIIQ8QlcURQHwFYAtRBTm8PI8r77qPsf4p5/aa6D79+fgIDuuuopXjDd+dnY2KziMM+lNm3L5evXctVWEVxfTI48E9NZ2OnTRAghOdOh33MGTfcY+atyY+yIpKbR2Rxptv0QLdQSoKHx9VqgQWMPSa0qVAm67jf/Py+NrtUyZxNKit2hhvr4qVeLF3Tt0cF+fumDNRx8BzzzD10ckb7rRxIm2UfQCcCUAArAewNrzr5vtjnGlQ//gA6K+fZ2XnzGDqEsXovR06zKff05UtKhzLfWuXawjHj/eeTvcUqsW0f33e1vnihVEzz1HlJbmrPyMGfw916yxLrN3L9Ho0c41/NHms8/4mvED/fsTJSXx/y1aEHXvHrnPOniQaN48otOniSZM4PO4fXvkPi+RGDOG++vgwVi3JChwqEMPeYRORH8C8PYZd/NmXu8zKYk1psuWBT9GDZ3fs4eXlzNqyfftYx9769Y8ghGtm3n8OB/frBnnhFGxckWcOsUj98aNgdKl3X3HSLFjBz9RNGkCtG/PLyPbtvF3uuQS/fakJE6ra3RXaVm4kEe7zZoB1aoFtp86xQqERo1i2xciOSag75doccMN/BQHRH6ictEiHv1v3hwdVY1fyM4G1q3jp2S769bI9u08Z/XllwHXa1qa/VoAcYS/crn06xdYSsuJDh0A/vtf9h1aqVxGjgSuvpr/t3Jv/PQT5yoxhvhb1fnPP2wwRZkY3XLrrcFdQk545pnAY/25c3yRGv3ljz4KDB5sPrZZM+B//zMHImmZOJH/Gtd5TU7m9q9ZE3rbvcBK5aLtl2hx333Aiy/y/5GWxmknRVWDHkqwjZ/p0sUcNHb0KP8Gf/rJXV0nT/IAr1AhvUFPEPxl0LU4TTlauTLnzFCTTnmpQy9RgpMdGQ2dlyqXzz4LLOrhFePG8WSZcXUiu2yLZ844y7bo1wmk7t15LsQPnD4duB4jPUJX61Zli9pticKmTcD+/fptoU6KqsZb1aFrtyUA/jPobnToAE+O5OezUQe81aHXqAHMmWNeOMFLlUskcKtymT6dJ9e2brWu0+86dDui3bYmTTgDIsBGNpIG49QpngAtWTJxDbpIUqu+d6tD1xr0OnVYulirVvht9An+MuhaY1y3bsAPaYfqPyPitTSNs/vaEXrTpuxr9UqH7kVgUePGvMSWl7hNn+tE5WJ1E/OLVNCKWLRPDcUHgA8+MLvyvP6sMmX4e1apwjldWrSI3OfFAjsdejgGvUYNdsmGkrHRp/jLoGt54w1g1qzg5VSDfsstnKirZEn9fu0I/Zpr2MXhdPJu+3Z2t/zyi7lOwJuR3+nT3if5ioQOXV3JyM86dD+QlxcIxQdYDle4cOQ+77HHgKlT+f9SpTjnid3asPGIlzr0evVYyqyuiXDuXPRlrRHEX/nQ33vPbJCD0bo1G34rFUO/foELPC+PL45ChfQXSKdOwPffmxe+yM7m4CKjwb3kEi5/6aXu2irCK3fA008H16E/95z45uFkhN67Nz81qflzVJo04b7w60o5Tz0V3R+sOiGp+mdXruR8PW++GZmI0UaN9IuMHzrET6mJFDF69dXm66tMGb7u1AAhp9xwA79U1IjR998Pv50+wF8GXbvayosvsszOGKVppGJFHp1/8w3n0PjjD5Y+qrRoEXgEHT0aePxxlilqf1xJSeLAGCtDV6UK3+W9wgu3gKrkAViW9eabegmmsYwWqxuAlkKFWAKamsqPqipVq3rbF6Fy++3im5XVd44UWtUJwFkBx4xhhVEkDPq8eTwIUjOKtm7NWUu//NL7z4oVIhtQtGho151xbi7BMi76y+WSnMx50AHWl2/aFPwYIl7ceM4csW49JYU11HYcOsR6XqNBsHJFpKdznepErB/YuJFHgwCH6b/8stmgb9woTqfQrBnwyiusjLFiwQKONN21S789LY37Tl0KMFbcf79Yi67tl2hQqhT7sdWRY6QnKp9/nv30KmXLJp5sUURODv8GRfn97bjrLr1LShr0CDJoEBsiIHiyKBVFYZ3xlCmB91rGj+c8F3b8+ivnJT9yRL/daoS+eTPX6YWh6N8/MLoKh1df5UUqADYe+/aZ3TmvvRYoo6VlS56zsFtrVZ1HMEoh163jvvNCkx8Op06JDdnw4Wzso0WlSvyZ6lyDatAjZTS0E7Dq5yWayqV5c/MycZmZ/BtU5w+ccvKkfk5DGvQo4TSwCND7da3kTaH4qsuW5Ud5o6zJy0nRESN4dRsv+fhjlmQZJ0CtJkUzMznhv0gBo+J3Hfptt3E61FiTkcHzLlodOhA5I6vmblFJRIOekmIeSISjctEm50uwnOj+NehOdeiAPvTXeIyajCoUJUm9ejzyN0Zy+lWH7iQfushoT5rEoc/q8n8i/G7Q/cL06ayMUpe9K1uWR4SRWK6Q6MIYodvp0EMJLNIa9HvvFT+1xin+mhTV0qSJc3mhOkLv0MFs0NW8JRs2sF/zP/8x69Dd4qUOvWZNnsT64gtv2gRY+/4LFhS3ORwdul/wS7uMk6LVq3P+oEjo4c+e5dxFWoP+wAP+mtvxAtHTulcjdD9M6HuIfw26m7zOlSqxpE40Kdq2Lf9duZKVBm6WKVu9mv3DU6boH+e9HK3auTlCRRSIAQTXodsZ9C5dOG+Gn3XofghyMhr0SLapaFFg1Sp9sjQ/uJ28xssR+r338iLqKmfOcOBX/frhtdEn+MugjxwZWl7il1+2fqStXZt16pddxmUyMwMrHal07syTfsasbbm57KM0Gt2LLmJljRdJtQBvfvQvvMD+W4DbKzLOw4aJJwidBBZ168Z+ea1kEeDo25kz/Rttp+2XaJCWxjmAtBNvTzzBMQuixGjhULAgyxS1pKayu6dVK2eignige3dzzEeRInzduY1/ePdd/fuPP2aJ9Nmz4T+5+wB/GXQ1GhHg1dqPHGGfZDAuvhj45BO++xqz/ikK69QBYNQoThN74oReolenDr+MWGVbLFeODZwXeOUqUJ9EAP6+2lGbilUEoROXy9mznGWyVSt9srKKFb3ri3AYOFCcbVHbL9HA6NMGeGX648e9N+gHD7JR6949cKOdOJGv8dRUe9VSPPHDD+ZtBQu6v+7y8nhQV6JE4DetTdAl+s3EGf66hS9ZwkYDYNndnj3OjktJYUmelXRu1y7gww+tR2r79vEPw5ga1sqg5+cD8+c7y9fuBC9G6MnJrAcHOLDo8cfNZdau5UAUIx06sJa5VCnr+hcuZDfYjh367amp3HdOFgCPJHfeyTd0I8nJHGwWLW6/nSWgWiI1UblxI2fq1MYGqIqXRJsYNUIE/PwzBx86ZccOvsa//z6wLcEyLvrLoD/1VMB37lSHDrAW2ihr0rJlC+dZX71avH/hQs5L7lSHrig8+aQN6AiVwYPN2RxD4b33gKFD+f/Dh8UX+mefiV0urVpx/9ilXVCN4uHD+u2bN3PfrV8fWru94sgRc9sADulW+yUadOkCPPigflukDLrRX6/9P5EMerVqfH1ryc8HevYMxJ84QZuYS0Ua9CjhRocebMUS9bFbHf27qfeBB8Q69Ftu4RWSRCsgueHNN4G+fcOrw8gHH4j9+1aTomlpnIgsnnXod97pfT+GwrZt7ArREimDrgZSGXXoQGIZ9CNHzE/PoUyKSoMeA9zmQweCG/SqVXly1Jgk3+qzVS66iBeLECXhuuUWniFfssRZG604cyb8m4KKVodupXIRGe3x43kewi5kXOZDd8att/KTppYaNdwnnXPChTBCt3tKBtzJFtU+0Rr0pCQWY2gTnMUx/jLoWgPevj37gp1gzAAoQh2xvvVW+PnQAXaTFC3KE17hUL++2QCEglGHLjLoVjr0s2f5rx/zoe/bx5LTq68OfkMWEW0po2hS9LPPvJtvMX4WoI/XSEriRHVaaV48YzWPpW4LZYSuFURUqcIqpASRLfrLoGt54w3go4+clS1Xjo3VSy9Zl2nblqVkjz3mXJ60eDEbMtGkWsmSrFFfvtxZXVpSUgIyy0iMHt3o0LOz+SmkRQv9o7sRVSkUzcUTiIBevTih0l9/cdZCvyMy6JHiqad4fki7qEv58sDdd/MTaSJgp8BysxoZwCqvF1/Uq3+IeB7ILko6niCiqL3atGlDtiQnE61bZ1/Giq1bidLSrPenpxMdOUK0ezdRXp5+36FDRH/8QZSZqd++cCERwPtEHD1KlJvrrp0rVxIVLkzUqRNRTg5RpUpEjzzirg4RGzcSrVjB/z/8MFHlyuYy27cT/fWXftvo0fwdZ82yr//IEaJFi4gyMvTb09K4f1JTQ266JUuWcNs+/5yoZ0+iihXN50jluuuIrrzSvF3bL5Hm3Dlu71tv6bfPmkXUtStfg5EmP59o6VI+19Fm6lSihg2J1q71rs7cXKK77yaaMcO8b/Fiol27rI89cYJo8mT7+vPz+ff4/PPhtTPCAEgmBzbWXwZdZd8+ot69iXr1cv6NX3+dqFkz+zIjRvBXdvrDWrCAyy9a5LwdWlJSiLKzA+/z84maNycqW5brff55NlJeGHQtf/5J9O23wcvl5xO1b8+GMD/fvuyGDURPPsnnJlr06kVUoQLRmTN80wCIxo0Tl502jejHH6PXNhFHjnAbP/lEv33cON6+e7e3nzdlCtE335i3Fy1K9Nxz3n5WMLZsISpShL/nP/9E97NF5ObyTbRwYTb6OTk8ADt+3Fy2cmWihx6KfhtdEL8G/fhxoipVuGlXXeX8G9euff7r2FCjhtig79pF9N13RKdO6bf//juXX7LEus733ycaOFC8HSBq0YJozZrA9i1b+IIfMoT3A0SPPmrfbicsXUo0c6Z9mVWriL7/Xr/t7FmiPXuC1//JJ9zWBQv0248e5b47eNBde4OxcyeRohC9+CK/V2+GzZoFv/locdIvXpGRQTRhAp9jLVOnct+F+vRpRefORB07mrdXrszXV7Q4fZqoSRP+3P37A9vdnKdQmDyZr+lp04hatdJfxy+8EHi627ePr5u6dfkJwkjDhkT9+um37d7NA4rmzdkOeMG5c0T/939EAwa4rtOpQfdXpCjAkYeDB/PkpZsJLSeJ7o1yMpW//mK/4+TJHDGqKJzIKz+f/ZN27Th2jKPzXnuN/eLFinEdnToB993HudbbtQP69AG++y4QqtysGScgO3XKmzUgP/mEJ94KFeIAio4dzWHhkydzqHPr1qysqV+fJ9REUbJG1Nzve/fqt2/fDgwYAMyYwXnds7M5IZU6YZWTw/1y7hznos7ICCz+sHIlLy5StixPbFesyMdVrcp9WqgQ598BePvrr3P5vDz2eebns5wtNZXnJa66CmjQwNwv69cHjyo8fpxlrRdfzOqm06c5eEztm7w8fjVqxG09doznT86e5e9cujTPQXTrZo7QFClPsrL4Oxw4wGHsZctyBG7p0txHW7fydy5alPcXLcpqrhIlAnWkp4sFAWXLcj+lpgZWSdq6ldtfsCDXUaIEf5Z2dS/1e2Zl8fkqUkQcbJaTw/1Vrhyn6nj4Ya5//nz+Dvn5vNxhsWIc7JWZyXXWrs3XRl4et11NwXHoEPd9mzYsEc7I4OusaFHu37Ztgbff5tQVWgYO5OPPnAGuvz4QLdu3L+dJHzQIGDKEv0vhwhyoKJLzanOinzvHEeVqcFjlyvyb6dyZv3d+PreLiGNf9uzha6ZUKa7j8OFA/xYowJLLpCTuy3vu4ajX4sXZFtxyC0uMVTuwbh3Xof5OqlTh35SbCFYnVt+rl2OXS34+0TPPEH3wgfNbWM2awUfo//kPlzl7Vr99xozAaFl9HT9OtHcv0eOPE61ebV2n6gpQXw8+qN+fmso+QIDoww+dfx+33H+/vh3Nm5vLvPaavky9ejy6csKTT/IxU6bot69YYe47dWT24IPmfeXKBY7t08e8PymJ9+Xl6Z9sjJQoYT62Z09zuf79eV+ZMkTVqxO99x5vP3GCtxUrRlSwYKCON9/k/X/+aa4fIPrhB94/f754/9y55jb88w/vU0fNmzaJjx07lvcvWyber/b9okXcj4UKEd1xh/nzOnTg8tonR+13VF9Dh/K+rCyi0qXZPaHd/+qrvD8tjejSS9k1V6cOUYECvP+zz3j/3LlEI0cGPis/n+jee82f9/bbvH/PHn5fqBB/D3X/F19Y9+2oUebvWaMGu+Q++4xdKkT8hFC0KNHll/OIWGXXLqLy5YluucVcz/XXB+Zf1Ou5Z09u59mz/BRKRPTrr7yveHH99bd4Me//5hvxedu4MVD3vHlc5/vv8/X3zju8T50vMr5++YWIyPEIXeGy0aFt27aUnJwcmcpPnOC7oZouVwQR34GNCcDy8nh0e/Ys34Hz8/mOXKQIj6Bq1LAepefns/oiN5dHS02aiCVja9bw6E47wvKStDT+jGLFeARRv755Sbn0dOD333lEmZvLoymnUaqZmcDYsZxSQKs4yM3ltVzPnePPLV48ECk5YwarMIoW5XZVqMB91KUL71cXLkhP5xHv8eM8urzrruDtmTiRP7tECa63YkV++jGe23XrOJowM5NHctddxylTz57lxF1FivDorXx5Hr21acOjrXPnWP2wfz9/34IF+dWiBT9BpKfz00nx4nx8RgaPspo3N4/Qz57lEP1bb+UntdOnWcGlJjtTk8C1a8dPB2lpwNKlfL1mZ/PoNjubn/rq1+d2ff45j8AHDuTRqZYNGwJPG1ddxdumTOFrOC+P+yIzkxOrXXstf9cXXuDvUqwYv4oXZ+nwZZfxk+3DD3O76tblNlSrxlJSq4XSs7I4O2duLtdVtChf/xddxOf8m2/4nJ88ye1s145/NyVK8Ij9jz/42AIF+BzddJM5nfaePXy9aHXlAF9HJUuar4Xt27muunX127//ns/tHXfweV250tynAD/5TpnC/ZCby+evbl3uh0qVOCnasmWB/s3N5Wula1dxXh1tO9PSOE1FuXJ8LZYsyfasXj2gbFkoirKKiIImJkocgy6RSCQJilOD7l8dukQikUhcIQ26RCKRJAjSoEskEkmCIA26RCKRJAjSoEskEkmCIA26RCKRJAjSoEskEkmCEJZBVxSlq6Io2xRF2aEoyvNeNUoikUgk7gnZoCuKUhDAZwBuAtAEQH9FUZp41TCJRCKRuCOcEXp7ADuIaBcRZQOYDKCHN82SSCQSiVvCybZYE4A2xeF+AJcZCymKMhjA4PNvsxRF2RjGZ8YjlQAcj3UjosyF9p0vtO8LyO8cbeoGLxKeQXcEEY0FMBYAFEVJdpKPIJGQ3znxudC+LyC/s18Jx+VyAIB24cJa57dJJBKJJAaEY9BXAmioKEp9RVGKAOgH4BdvmiWRSCQSt4TsciGiXEVRhgKYB6AggP8jok1BDhsb6ufFMfI7Jz4X2vcF5Hf2JVHNhy6RSCSSyCEjRSUSiSRBkAZdIpFIEoSoGPQLIUWAoii1FUX5Q1GUzYqibFIU5Ynz2ysoivKboijbz/8tH6yueENRlIKKoqxRFGXW+ff1FUVZcf58Tzk/aZ4wKIpSTlGUqYqibFUUZYuiKJcn+nlWFOWp89f1RkVRJimKUizRzrOiKP+nKMpRbayM1XlVmI/Pf/f1iqK0jl3LA0TcoF9AKQJyAQwjoiYAOgB49Pz3fB7AAiJqCGDB+feJxhMAtmjevw/gf0R0EYCTAB6ISasixygAvxJRYwAtwN89Yc+zoig1ATwOoC0RNQWLIPoh8c7zeABdDduszutNABqefw0GMCZKbbQlGiP0CyJFABEdIqLV5/8/Df6R1wR/1wnni00A0DM2LYwMiqLUAnALgHHn3ysArgMw9XyRhPrOiqKUBXA1gK8AgIiyp5Rt1wAAAmJJREFUiSgNCX6ewYq44oqiFAJQAsAhJNh5JqIlAE4YNlud1x4AviFmOYByiqJUj05LrYmGQRelCKgZhc+NGYqi1APQCsAKAFWJ6ND5XYcBVI1RsyLFSADPAcg//74igDQiyj3/PtHOd30AxwB8fd7NNE5RlJJI4PNMRAcAfAhgL9iQpwNYhcQ+zypW59WXdk1OinrM/7dz/zwyRWEcgJ+TkE1QWKUo0GhRbUIhqDai0kls4QtoReULaFUqEQU2bJT+1IRErCDYkNiCVZGotngV50wyIduZO5x9n+Rm7tyZZM7Jb/Jm7nvP3FLKDtzBhYj4Mf5a1DWi3awTLaWcwlpEPJ/2WAa0BYdxNSIO4aff2isd5jyr/iLdh93Y7s/WRPf+h1yHKOib5hYBpZStajG/ERGL7fDX0alYe1yb1vgm4AhOl1I+qa2042p/eWc7Nae/vFexGhFP2vPbaoHvOeeT+BgR3yJiHYtq9j3nPLJRrv9kXRuioG+KWwS03vE1vImIK2MvLWGh7S/g3tBjm5SIuBgReyJir5rro4g4i8c4097W25y/4HMp5UA7dAKvdZyz2mqZK6Vsa9/z0Zy7zXnMRrku4Vxb7TKH72OtmemJiIlvmMc7rODSEJ859Iaj6unYS7xo27zaU36I93iAXdMe64Tmfwz32/5+PMUH3MLMtMf3l+d6EM9a1ncx23vOuIy3eIXrmOktZ9xUrxGsq2di5zfKFUVdvbeCZXUF0NTnkH/9TymlTuRF0ZRS6kQW9JRS6kQW9JRS6kQW9JRS6kQW9JRS6kQW9JRS6kQW9JRS6sQvrfHP8MzOIicAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## Plot loss and accuracy por 100 episodes, Loss is decrasing and accuracy is growing during the first 100 episodes \n", "## The reward seems to be the same but the loss function gets smaller \n", "episodes = list(range(0, 144))\n", "\n", "loss_Sarsa_Bolzman = [3.28,22.82,122.84,0.744,0.585,26.28,24.78,2.308,0.561,25.376,\n", " 1.2509,0.571,110.953,0.565,0.563,0.563,26.851,1.032,0.554,0.565,\n", " 0.656,0.563,0.751,0.570,96.41,2.667,87.50,23.47,121.085,0.565,0.569,\n", " 17.62,0.562,0.560,0.825,23.69,26.398,0.551,102.505,0.555,0.559,0.554,\n", " 0.558,0.569,0.568,0.557,0.567,0.884,0.560,0.560,0.570,117.603,0.554,\n", " 0.548,0.553,0.554,0.584,0.563,0.563,0.566,0.571,0.563,2.901,0.545,0.550,\n", " 0.566,0.558,0.564,0.560,0.557,0.546,2.965,0.559,0.562,0.562,0.557,0.551,\n", " 0.557,0.560,0.541,0.559,0.555,0.737,0.559,0.564,0.561,0.549,0.578,0.560,\n", " 0.561,23.818,0.564,0.645,0.568,0.743,0.561, 2.795,0.543,0.558,0.644,0.559,\n", " 0.550,0.567,0.562,0.556,0.570,0.634,0.556,0.563,0.574,0.557,0.569,108.60,\n", " 0.566,0.556,0.578,25.339,0.555,0.558,0.557,0.558,0.563,0.559,0.565,0.564,\n", " 0.564,0.561,0.555,0.560,19.557,0.568,0.570,0.560,0.559,0.559,0.536,0.569,\n", " 0.552,0.546,0.561,0.562,0.556,0.552,0.544]\n", "\n", "plt.plot(episodes, loss_Sarsa_Bolzman, 'r--')\n", "plt.axis([0, 110, 0, 10])\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Conclusion State-Action-Reward agent\n", "\n", "#### The State-action-Reward agent with Neural Network model and Boltzmann Gumbel Q Policy\n", "#### shows unstable learning during episodes. \n", "#### The test reward comes at 70 . Try more episodes in order to test better learning " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training for 1000000 steps ...\n", " 777/1000000: episode: 1, duration: 8.665s, episode steps: 777, steps per second: 90, episode reward: 230.000, mean reward: 0.296 [0.000, 50.000], mean action: 7.069 [0.000, 8.000], mean observation: 72.865 [0.000, 228.000], loss: 2.938891, mean_absolute_error: 0.084981, acc: 0.802872, mean_q: 1.000000\n", " 1743/1000000: episode: 2, duration: 7.822s, episode steps: 966, steps per second: 123, episode reward: 450.000, mean reward: 0.466 [0.000, 200.000], mean action: 7.844 [0.000, 8.000], mean observation: 72.801 [0.000, 228.000], loss: 23.077522, mean_absolute_error: 0.064501, acc: 0.962694, mean_q: 1.000000\n", " 2818/1000000: episode: 3, duration: 8.631s, episode steps: 1075, steps per second: 125, episode reward: 240.000, mean reward: 0.223 [0.000, 50.000], mean action: 7.829 [0.000, 8.000], mean observation: 72.808 [0.000, 228.000], loss: 2.101268, mean_absolute_error: 0.038194, acc: 0.960894, mean_q: 1.000000\n", " 3425/1000000: episode: 4, duration: 4.910s, episode steps: 607, steps per second: 124, episode reward: 160.000, mean reward: 0.264 [0.000, 10.000], mean action: 7.908 [0.000, 8.000], mean observation: 72.879 [0.000, 228.000], loss: 1.326534, mean_absolute_error: 0.038376, acc: 0.973597, mean_q: 1.000000\n", " 4049/1000000: episode: 5, duration: 4.979s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.875 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.585362, mean_absolute_error: 0.021693, acc: 0.974318, mean_q: 1.000000\n", " 4670/1000000: episode: 6, duration: 4.949s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.900 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.585000, mean_absolute_error: 0.020728, acc: 0.977419, mean_q: 1.000000\n", " 5643/1000000: episode: 7, duration: 7.760s, episode steps: 973, steps per second: 125, episode reward: 390.000, mean reward: 0.401 [0.000, 50.000], mean action: 7.906 [0.000, 8.000], mean observation: 72.534 [0.000, 228.000], loss: 3.030100, mean_absolute_error: 0.051387, acc: 0.979424, mean_q: 1.000000\n", " 6426/1000000: episode: 8, duration: 6.233s, episode steps: 783, steps per second: 126, episode reward: 440.000, mean reward: 0.562 [0.000, 200.000], mean action: 7.943 [0.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 28.383811, mean_absolute_error: 0.067648, acc: 0.985934, mean_q: 1.000000\n", " 7049/1000000: episode: 9, duration: 4.952s, episode steps: 623, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.942 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.573570, mean_absolute_error: 0.017481, acc: 0.987138, mean_q: 1.000000\n", " 7673/1000000: episode: 10, duration: 4.990s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.947 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.571061, mean_absolute_error: 0.017102, acc: 0.987159, mean_q: 1.000000\n", " 8449/1000000: episode: 11, duration: 6.216s, episode steps: 776, steps per second: 125, episode reward: 440.000, mean reward: 0.567 [0.000, 200.000], mean action: 7.936 [0.000, 8.000], mean observation: 72.827 [0.000, 228.000], loss: 28.642736, mean_absolute_error: 0.069101, acc: 0.983226, mean_q: 1.000000\n", " 9073/1000000: episode: 12, duration: 4.979s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.566294, mean_absolute_error: 0.015336, acc: 0.993579, mean_q: 1.000000\n", " 9698/1000000: episode: 13, duration: 4.994s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.947 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.571732, mean_absolute_error: 0.017429, acc: 0.987179, mean_q: 1.000000\n", " 10329/1000000: episode: 14, duration: 5.021s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.959 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.579018, mean_absolute_error: 0.016229, acc: 0.990476, mean_q: 1.000000\n", " 10947/1000000: episode: 15, duration: 4.925s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.972 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.571800, mean_absolute_error: 0.015475, acc: 0.993517, mean_q: 1.000000\n", " 11571/1000000: episode: 16, duration: 4.979s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.962 [0.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566294, mean_absolute_error: 0.015513, acc: 0.991974, mean_q: 1.000000\n", " 12505/1000000: episode: 17, duration: 7.435s, episode steps: 934, steps per second: 126, episode reward: 840.000, mean reward: 0.899 [0.000, 400.000], mean action: 7.918 [0.000, 8.000], mean observation: 72.831 [0.000, 228.000], loss: 109.560523, mean_absolute_error: 0.107581, acc: 0.979636, mean_q: 1.000000\n", " 13126/1000000: episode: 18, duration: 4.954s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.976 [2.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.570630, mean_absolute_error: 0.015937, acc: 0.991935, mean_q: 1.000000\n", " 13756/1000000: episode: 19, duration: 5.016s, episode steps: 630, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.940 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565614, mean_absolute_error: 0.016949, acc: 0.987281, mean_q: 1.000000\n", " 14385/1000000: episode: 20, duration: 5.035s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.970 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564938, mean_absolute_error: 0.016449, acc: 0.988854, mean_q: 1.000000\n", " 15101/1000000: episode: 21, duration: 5.736s, episode steps: 716, steps per second: 125, episode reward: 100.000, mean reward: 0.140 [0.000, 10.000], mean action: 7.971 [1.000, 8.000], mean observation: 72.934 [0.000, 228.000], loss: 0.704189, mean_absolute_error: 0.018620, acc: 0.993007, mean_q: 1.000000\n", " 15732/1000000: episode: 22, duration: 5.047s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 16360/1000000: episode: 23, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 110.000, mean reward: 0.175 [0.000, 10.000], mean action: 7.965 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.868391, mean_absolute_error: 0.023197, acc: 0.988836, mean_q: 1.000000\n", " 16985/1000000: episode: 24, duration: 5.036s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.965 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.568559, mean_absolute_error: 0.016371, acc: 0.990385, mean_q: 1.000000\n", " 17612/1000000: episode: 25, duration: 5.049s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.984 [1.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.579554, mean_absolute_error: 0.015272, acc: 0.993610, mean_q: 1.000000\n", " 18234/1000000: episode: 26, duration: 4.994s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.950 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.571306, mean_absolute_error: 0.016445, acc: 0.990338, mean_q: 1.000000\n", " 18849/1000000: episode: 27, duration: 4.954s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.987 [2.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.574594, mean_absolute_error: 0.015545, acc: 0.993485, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 19476/1000000: episode: 28, duration: 5.024s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000\n", " 20181/1000000: episode: 29, duration: 5.674s, episode steps: 705, steps per second: 124, episode reward: 100.000, mean reward: 0.142 [0.000, 10.000], mean action: 7.974 [1.000, 8.000], mean observation: 72.934 [0.000, 228.000], loss: 0.713785, mean_absolute_error: 0.018425, acc: 0.994318, mean_q: 1.000000\n", " 20809/1000000: episode: 30, duration: 5.042s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 21442/1000000: episode: 31, duration: 5.079s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 22065/1000000: episode: 32, duration: 4.966s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.981 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.568796, mean_absolute_error: 0.015890, acc: 0.991961, mean_q: 1.000000\n", " 22688/1000000: episode: 33, duration: 5.005s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.567204, mean_absolute_error: 0.015359, acc: 0.993569, mean_q: 1.000000\n", " 23320/1000000: episode: 34, duration: 5.074s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.983 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557546, mean_absolute_error: 0.014633, acc: 0.995246, mean_q: 1.000000\n", " 23938/1000000: episode: 35, duration: 4.988s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.985 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.573405, mean_absolute_error: 0.016009, acc: 0.991896, mean_q: 1.000000\n", " 24625/1000000: episode: 36, duration: 5.498s, episode steps: 687, steps per second: 125, episode reward: 100.000, mean reward: 0.146 [0.000, 10.000], mean action: 7.972 [0.000, 8.000], mean observation: 72.909 [0.000, 228.000], loss: 0.735399, mean_absolute_error: 0.019841, acc: 0.991254, mean_q: 1.000000\n", " 25250/1000000: episode: 37, duration: 5.021s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 25876/1000000: episode: 38, duration: 5.105s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566066, mean_absolute_error: 0.015819, acc: 0.992000, mean_q: 1.000000\n", " 26516/1000000: episode: 39, duration: 5.446s, episode steps: 640, steps per second: 118, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.986 [0.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.550566, mean_absolute_error: 0.014464, acc: 0.995305, mean_q: 1.000000\n", " 27143/1000000: episode: 40, duration: 5.047s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565161, mean_absolute_error: 0.015795, acc: 0.992013, mean_q: 1.000000\n", " 27873/1000000: episode: 41, duration: 5.925s, episode steps: 730, steps per second: 123, episode reward: 240.000, mean reward: 0.329 [0.000, 50.000], mean action: 7.974 [1.000, 8.000], mean observation: 72.821 [0.000, 228.000], loss: 3.035784, mean_absolute_error: 0.040047, acc: 0.991770, mean_q: 1.000000\n", " 28494/1000000: episode: 42, duration: 5.243s, episode steps: 621, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 29434/1000000: episode: 43, duration: 7.656s, episode steps: 940, steps per second: 123, episode reward: 440.000, mean reward: 0.468 [0.000, 200.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.784 [0.000, 228.000], loss: 23.642288, mean_absolute_error: 0.054649, acc: 0.994675, mean_q: 1.000000\n", " 30070/1000000: episode: 44, duration: 5.128s, episode steps: 636, steps per second: 124, episode reward: 110.000, mean reward: 0.173 [0.000, 10.000], mean action: 7.970 [0.000, 8.000], mean observation: 72.876 [0.000, 228.000], loss: 0.869924, mean_absolute_error: 0.022052, acc: 0.993701, mean_q: 1.000000\n", " 31159/1000000: episode: 45, duration: 8.829s, episode steps: 1089, steps per second: 123, episode reward: 440.000, mean reward: 0.404 [0.000, 200.000], mean action: 7.963 [0.000, 8.000], mean observation: 72.747 [0.000, 228.000], loss: 20.407248, mean_absolute_error: 0.048227, acc: 0.992647, mean_q: 1.000000\n", " 31982/1000000: episode: 46, duration: 6.783s, episode steps: 823, steps per second: 121, episode reward: 440.000, mean reward: 0.535 [0.000, 200.000], mean action: 7.981 [3.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 27.008629, mean_absolute_error: 0.062671, acc: 0.992701, mean_q: 1.000000\n", " 32608/1000000: episode: 47, duration: 5.253s, episode steps: 626, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000\n", " 33224/1000000: episode: 48, duration: 5.169s, episode steps: 616, steps per second: 119, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.982 [1.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555952, mean_absolute_error: 0.014627, acc: 0.995122, mean_q: 1.000000\n", " 33852/1000000: episode: 49, duration: 5.272s, episode steps: 628, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561102, mean_absolute_error: 0.014719, acc: 0.995215, mean_q: 1.000000\n", " 34483/1000000: episode: 50, duration: 5.450s, episode steps: 631, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.983 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000\n", " 35101/1000000: episode: 51, duration: 4.996s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.570196, mean_absolute_error: 0.014940, acc: 0.995138, mean_q: 1.000000\n", " 35738/1000000: episode: 52, duration: 5.098s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [4.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.553163, mean_absolute_error: 0.014527, acc: 0.995283, mean_q: 1.000000\n", " 36359/1000000: episode: 53, duration: 4.970s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 36988/1000000: episode: 54, duration: 5.049s, episode steps: 629, steps per second: 125, episode reward: 80.000, mean reward: 0.127 [0.000, 10.000], mean action: 7.983 [1.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.641244, mean_absolute_error: 0.016989, acc: 0.993631, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 37608/1000000: episode: 55, duration: 4.946s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.979 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.568353, mean_absolute_error: 0.014895, acc: 0.995153, mean_q: 1.000000\n", " 38223/1000000: episode: 56, duration: 4.909s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.985 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.572981, mean_absolute_error: 0.015007, acc: 0.995114, mean_q: 1.000000\n", " 38838/1000000: episode: 57, duration: 4.982s, episode steps: 615, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.571369, mean_absolute_error: 0.014470, acc: 0.996743, mean_q: 1.000000\n", " 39677/1000000: episode: 58, duration: 6.716s, episode steps: 839, steps per second: 125, episode reward: 840.000, mean reward: 1.001 [0.000, 400.000], mean action: 7.982 [0.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 121.950028, mean_absolute_error: 0.113348, acc: 0.996420, mean_q: 1.000000\n", " 40303/1000000: episode: 59, duration: 5.016s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 41165/1000000: episode: 60, duration: 6.909s, episode steps: 862, steps per second: 125, episode reward: 840.000, mean reward: 0.974 [0.000, 400.000], mean action: 7.980 [1.000, 8.000], mean observation: 72.786 [0.000, 228.000], loss: 118.694663, mean_absolute_error: 0.111116, acc: 0.994193, mean_q: 1.000000\n", " 41798/1000000: episode: 61, duration: 5.082s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.556664, mean_absolute_error: 0.014611, acc: 0.995253, mean_q: 1.000000\n", " 42431/1000000: episode: 62, duration: 5.072s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.558230, mean_absolute_error: 0.015134, acc: 0.993671, mean_q: 1.000000\n", " 43062/1000000: episode: 63, duration: 5.058s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 43687/1000000: episode: 64, duration: 4.995s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 44313/1000000: episode: 65, duration: 5.025s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.982 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000\n", " 44933/1000000: episode: 66, duration: 4.991s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 45552/1000000: episode: 67, duration: 4.975s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000\n", " 46786/1000000: episode: 68, duration: 9.825s, episode steps: 1234, steps per second: 126, episode reward: 440.000, mean reward: 0.357 [0.000, 200.000], mean action: 7.996 [5.000, 8.000], mean observation: 72.733 [0.000, 228.000], loss: 18.003360, mean_absolute_error: 0.041348, acc: 0.997567, mean_q: 1.000000\n", " 47406/1000000: episode: 69, duration: 5.091s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 48342/1000000: episode: 70, duration: 7.861s, episode steps: 936, steps per second: 119, episode reward: 840.000, mean reward: 0.897 [0.000, 400.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.826 [0.000, 228.000], loss: 109.297474, mean_absolute_error: 0.101351, acc: 0.997861, mean_q: 1.000000\n", " 48969/1000000: episode: 71, duration: 5.102s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000\n", " 49584/1000000: episode: 72, duration: 5.140s, episode steps: 615, steps per second: 120, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.992 [5.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.572981, mean_absolute_error: 0.015007, acc: 0.995114, mean_q: 1.000000\n", " 50513/1000000: episode: 73, duration: 7.933s, episode steps: 929, steps per second: 117, episode reward: 240.000, mean reward: 0.258 [0.000, 50.000], mean action: 7.982 [1.000, 8.000], mean observation: 72.764 [0.000, 228.000], loss: 2.371892, mean_absolute_error: 0.030984, acc: 0.995690, mean_q: 1.000000\n", " 51129/1000000: episode: 74, duration: 4.978s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.982 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.573659, mean_absolute_error: 0.015521, acc: 0.993496, mean_q: 1.000000\n", " 51760/1000000: episode: 75, duration: 5.522s, episode steps: 631, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 52391/1000000: episode: 76, duration: 5.124s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 53006/1000000: episode: 77, duration: 4.966s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 53634/1000000: episode: 78, duration: 5.038s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 54268/1000000: episode: 79, duration: 5.111s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555784, mean_absolute_error: 0.014590, acc: 0.995261, mean_q: 1.000000\n", " 54892/1000000: episode: 80, duration: 5.016s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000\n", " 55570/1000000: episode: 81, duration: 5.431s, episode steps: 678, steps per second: 125, episode reward: 100.000, mean reward: 0.147 [0.000, 10.000], mean action: 7.981 [1.000, 8.000], mean observation: 72.906 [0.000, 228.000], loss: 0.740788, mean_absolute_error: 0.018628, acc: 0.995569, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 56203/1000000: episode: 82, duration: 5.093s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.991 [3.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.556664, mean_absolute_error: 0.014611, acc: 0.995253, mean_q: 1.000000\n", " 56836/1000000: episode: 83, duration: 5.085s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 57452/1000000: episode: 84, duration: 4.944s, episode steps: 616, steps per second: 125, episode reward: 90.000, mean reward: 0.146 [0.000, 10.000], mean action: 7.987 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.734326, mean_absolute_error: 0.018591, acc: 0.995122, mean_q: 1.000000\n", " 58075/1000000: episode: 85, duration: 5.017s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 58703/1000000: episode: 86, duration: 5.039s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 59333/1000000: episode: 87, duration: 5.059s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559318, mean_absolute_error: 0.014676, acc: 0.995231, mean_q: 1.000000\n", " 60127/1000000: episode: 88, duration: 6.351s, episode steps: 794, steps per second: 125, episode reward: 240.000, mean reward: 0.302 [0.000, 50.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 2.775674, mean_absolute_error: 0.036070, acc: 0.994956, mean_q: 1.000000\n", " 60755/1000000: episode: 89, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 61384/1000000: episode: 90, duration: 5.101s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 62394/1000000: episode: 91, duration: 8.084s, episode steps: 1010, steps per second: 125, episode reward: 840.000, mean reward: 0.832 [0.000, 400.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.831 [0.000, 228.000], loss: 101.281608, mean_absolute_error: 0.094000, acc: 0.998018, mean_q: 1.000000\n", " 63028/1000000: episode: 92, duration: 5.045s, episode steps: 634, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 63627/1000000: episode: 93, duration: 4.799s, episode steps: 599, steps per second: 125, episode reward: 130.000, mean reward: 0.217 [0.000, 10.000], mean action: 7.987 [2.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 1.088979, mean_absolute_error: 0.026505, acc: 0.994983, mean_q: 1.000000\n", " 64250/1000000: episode: 94, duration: 5.007s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.970 [2.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.568796, mean_absolute_error: 0.015890, acc: 0.991961, mean_q: 1.000000\n", " 64877/1000000: episode: 95, duration: 5.039s, episode steps: 627, steps per second: 124, episode reward: 110.000, mean reward: 0.175 [0.000, 10.000], mean action: 7.987 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.880848, mean_absolute_error: 0.021826, acc: 0.995208, mean_q: 1.000000\n", " 65495/1000000: episode: 96, duration: 4.993s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.570196, mean_absolute_error: 0.014940, acc: 0.995138, mean_q: 1.000000\n", " 66127/1000000: episode: 97, duration: 5.072s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000\n", " 66757/1000000: episode: 98, duration: 5.076s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559318, mean_absolute_error: 0.014676, acc: 0.995231, mean_q: 1.000000\n", " 67386/1000000: episode: 99, duration: 5.056s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 68010/1000000: episode: 100, duration: 5.167s, episode steps: 624, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 68637/1000000: episode: 101, duration: 5.054s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000\n", " 69270/1000000: episode: 102, duration: 5.067s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 69899/1000000: episode: 103, duration: 5.061s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 70523/1000000: episode: 104, duration: 5.003s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 71134/1000000: episode: 105, duration: 4.940s, episode steps: 611, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.575115, mean_absolute_error: 0.014557, acc: 0.996721, mean_q: 1.000000\n", " 71760/1000000: episode: 106, duration: 5.358s, episode steps: 626, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 72379/1000000: episode: 107, duration: 5.078s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 73008/1000000: episode: 108, duration: 5.133s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 73642/1000000: episode: 109, duration: 5.542s, episode steps: 634, steps per second: 114, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 74276/1000000: episode: 110, duration: 5.082s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 74896/1000000: episode: 111, duration: 5.017s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 75523/1000000: episode: 112, duration: 5.050s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.984 [1.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000\n", " 76145/1000000: episode: 113, duration: 5.004s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566523, mean_absolute_error: 0.014851, acc: 0.995169, mean_q: 1.000000\n", " 76763/1000000: episode: 114, duration: 5.041s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.568591, mean_absolute_error: 0.014405, acc: 0.996759, mean_q: 1.000000\n", " 77393/1000000: episode: 115, duration: 5.421s, episode steps: 630, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 78024/1000000: episode: 116, duration: 5.238s, episode steps: 631, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 78657/1000000: episode: 117, duration: 5.127s, episode steps: 633, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 79658/1000000: episode: 118, duration: 8.076s, episode steps: 1001, steps per second: 124, episode reward: 240.000, mean reward: 0.240 [0.000, 50.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.757 [0.000, 228.000], loss: 2.199140, mean_absolute_error: 0.028173, acc: 0.998000, mean_q: 1.000000\n", " 80290/1000000: episode: 119, duration: 5.097s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 80908/1000000: episode: 120, duration: 4.965s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 81535/1000000: episode: 121, duration: 5.057s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 82163/1000000: episode: 122, duration: 5.498s, episode steps: 628, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.978 [0.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561102, mean_absolute_error: 0.014719, acc: 0.995215, mean_q: 1.000000\n", " 82789/1000000: episode: 123, duration: 5.050s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 83384/1000000: episode: 124, duration: 4.804s, episode steps: 595, steps per second: 124, episode reward: 150.000, mean reward: 0.252 [0.000, 10.000], mean action: 7.988 [1.000, 8.000], mean observation: 72.829 [0.000, 228.000], loss: 1.262659, mean_absolute_error: 0.029854, acc: 0.996633, mean_q: 1.000000\n", " 84011/1000000: episode: 125, duration: 5.025s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 84632/1000000: episode: 126, duration: 5.008s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 85437/1000000: episode: 127, duration: 6.496s, episode steps: 805, steps per second: 124, episode reward: 440.000, mean reward: 0.547 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.826 [0.000, 228.000], loss: 27.608373, mean_absolute_error: 0.062407, acc: 0.997512, mean_q: 1.000000\n", " 86065/1000000: episode: 128, duration: 5.578s, episode steps: 628, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 86691/1000000: episode: 129, duration: 5.067s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000\n", " 87314/1000000: episode: 130, duration: 5.019s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 87940/1000000: episode: 131, duration: 5.044s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 88574/1000000: episode: 132, duration: 5.099s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 89208/1000000: episode: 133, duration: 5.165s, episode steps: 634, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 89844/1000000: episode: 134, duration: 5.418s, episode steps: 636, steps per second: 117, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.569782, mean_absolute_error: 0.014551, acc: 0.995276, mean_q: 1.000000\n", " 90466/1000000: episode: 135, duration: 5.059s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 91096/1000000: episode: 136, duration: 5.037s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 91723/1000000: episode: 137, duration: 5.036s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 92359/1000000: episode: 138, duration: 5.092s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 92990/1000000: episode: 139, duration: 5.032s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 93619/1000000: episode: 140, duration: 5.026s, episode steps: 629, steps per second: 125, episode reward: 80.000, mean reward: 0.127 [0.000, 10.000], mean action: 7.989 [3.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.639668, mean_absolute_error: 0.016463, acc: 0.995223, mean_q: 1.000000\n", " 94239/1000000: episode: 141, duration: 4.976s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 94856/1000000: episode: 142, duration: 4.992s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 95489/1000000: episode: 143, duration: 5.062s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 96118/1000000: episode: 144, duration: 5.067s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 96742/1000000: episode: 145, duration: 5.001s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000\n", " 97372/1000000: episode: 146, duration: 5.077s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 98010/1000000: episode: 147, duration: 5.086s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 98630/1000000: episode: 148, duration: 4.953s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000\n", " 99262/1000000: episode: 149, duration: 5.040s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000\n", " 99886/1000000: episode: 150, duration: 5.027s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.982 [0.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564705, mean_absolute_error: 0.014806, acc: 0.995185, mean_q: 1.000000\n", " 100507/1000000: episode: 151, duration: 5.023s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 101281/1000000: episode: 152, duration: 6.234s, episode steps: 774, steps per second: 124, episode reward: 240.000, mean reward: 0.310 [0.000, 50.000], mean action: 7.991 [1.000, 8.000], mean observation: 72.822 [0.000, 228.000], loss: 2.844927, mean_absolute_error: 0.036120, acc: 0.997413, mean_q: 1.000000\n", " 101909/1000000: episode: 153, duration: 5.073s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 102547/1000000: episode: 154, duration: 5.248s, episode steps: 638, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552295, mean_absolute_error: 0.014505, acc: 0.995290, mean_q: 1.000000\n", " 103176/1000000: episode: 155, duration: 5.442s, episode steps: 629, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 103790/1000000: episode: 156, duration: 4.944s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 104424/1000000: episode: 157, duration: 5.090s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 105061/1000000: episode: 158, duration: 5.155s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 105690/1000000: episode: 159, duration: 5.063s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 106325/1000000: episode: 160, duration: 5.105s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000\n", " 106957/1000000: episode: 161, duration: 5.073s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.975 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014284, acc: 0.995246, mean_q: 1.000000\n", " 107582/1000000: episode: 162, duration: 5.071s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 108213/1000000: episode: 163, duration: 5.382s, episode steps: 631, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 108848/1000000: episode: 164, duration: 5.602s, episode steps: 635, steps per second: 113, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000\n", " 109475/1000000: episode: 165, duration: 5.399s, episode steps: 627, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 110103/1000000: episode: 166, duration: 5.044s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 110724/1000000: episode: 167, duration: 4.978s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 111353/1000000: episode: 168, duration: 5.056s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 112417/1000000: episode: 169, duration: 8.556s, episode steps: 1064, steps per second: 124, episode reward: 840.000, mean reward: 0.789 [0.000, 400.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.831 [0.000, 228.000], loss: 96.137474, mean_absolute_error: 0.089591, acc: 0.997178, mean_q: 1.000000\n", " 113042/1000000: episode: 170, duration: 5.015s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 114392/1000000: episode: 171, duration: 10.825s, episode steps: 1350, steps per second: 125, episode reward: 240.000, mean reward: 0.178 [0.000, 50.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.824 [0.000, 228.000], loss: 1.630947, mean_absolute_error: 0.021417, acc: 0.997776, mean_q: 1.000000\n", " 115010/1000000: episode: 172, duration: 5.067s, episode steps: 618, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 115642/1000000: episode: 173, duration: 5.089s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 116268/1000000: episode: 174, duration: 5.063s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 117381/1000000: episode: 175, duration: 8.975s, episode steps: 1113, steps per second: 124, episode reward: 440.000, mean reward: 0.395 [0.000, 200.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.756 [0.000, 228.000], loss: 19.962354, mean_absolute_error: 0.045726, acc: 0.997302, mean_q: 1.000000\n", " 117997/1000000: episode: 176, duration: 4.968s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.979 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.572050, mean_absolute_error: 0.014985, acc: 0.995122, mean_q: 1.000000\n", " 118619/1000000: episode: 177, duration: 5.040s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 119240/1000000: episode: 178, duration: 5.007s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 119852/1000000: episode: 179, duration: 4.931s, episode steps: 612, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.972 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.575794, mean_absolute_error: 0.015256, acc: 0.993453, mean_q: 1.000000\n", " 120483/1000000: episode: 180, duration: 5.078s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 121106/1000000: episode: 181, duration: 5.026s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 121720/1000000: episode: 182, duration: 4.955s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.572301, mean_absolute_error: 0.014492, acc: 0.996737, mean_q: 1.000000\n", " 122356/1000000: episode: 183, duration: 5.142s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000\n", " 122989/1000000: episode: 184, duration: 5.098s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 123607/1000000: episode: 185, duration: 4.972s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.974 [2.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.573405, mean_absolute_error: 0.016009, acc: 0.991896, mean_q: 1.000000\n", " 124231/1000000: episode: 186, duration: 4.982s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567883, mean_absolute_error: 0.015866, acc: 0.991974, mean_q: 1.000000\n", " 124865/1000000: episode: 187, duration: 5.061s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 125494/1000000: episode: 188, duration: 5.025s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 126111/1000000: episode: 189, duration: 4.914s, episode steps: 617, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 126732/1000000: episode: 190, duration: 4.970s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 127353/1000000: episode: 191, duration: 4.976s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 127972/1000000: episode: 192, duration: 4.972s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.982 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569273, mean_absolute_error: 0.014917, acc: 0.995146, mean_q: 1.000000\n", " 128593/1000000: episode: 193, duration: 4.968s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 129226/1000000: episode: 194, duration: 5.157s, episode steps: 633, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 129841/1000000: episode: 195, duration: 4.964s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 130939/1000000: episode: 196, duration: 8.771s, episode steps: 1098, steps per second: 125, episode reward: 240.000, mean reward: 0.219 [0.000, 50.000], mean action: 7.986 [1.000, 8.000], mean observation: 72.774 [0.000, 228.000], loss: 2.006495, mean_absolute_error: 0.026382, acc: 0.996354, mean_q: 1.000000\n", " 132000/1000000: episode: 197, duration: 8.742s, episode steps: 1061, steps per second: 121, episode reward: 840.000, mean reward: 0.792 [0.000, 400.000], mean action: 7.991 [0.000, 8.000], mean observation: 72.836 [0.000, 228.000], loss: 96.409561, mean_absolute_error: 0.089842, acc: 0.997170, mean_q: 1.000000\n", " 132625/1000000: episode: 198, duration: 5.386s, episode steps: 625, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 133256/1000000: episode: 199, duration: 5.362s, episode steps: 631, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 133887/1000000: episode: 200, duration: 5.119s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 134530/1000000: episode: 201, duration: 5.429s, episode steps: 643, steps per second: 118, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000\n", " 135165/1000000: episode: 202, duration: 5.239s, episode steps: 635, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 135790/1000000: episode: 203, duration: 5.083s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 136406/1000000: episode: 204, duration: 4.993s, episode steps: 616, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 137023/1000000: episode: 205, duration: 4.977s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 137645/1000000: episode: 206, duration: 5.079s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.984 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566523, mean_absolute_error: 0.014851, acc: 0.995169, mean_q: 1.000000\n", " 138277/1000000: episode: 207, duration: 5.149s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000\n", " 138906/1000000: episode: 208, duration: 5.186s, episode steps: 629, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 139516/1000000: episode: 209, duration: 4.976s, episode steps: 610, steps per second: 123, episode reward: 90.000, mean reward: 0.148 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.739935, mean_absolute_error: 0.018221, acc: 0.996716, mean_q: 1.000000\n", " 140147/1000000: episode: 210, duration: 5.084s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000\n", " 140777/1000000: episode: 211, duration: 5.093s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 141392/1000000: episode: 212, duration: 4.945s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.571369, mean_absolute_error: 0.014470, acc: 0.996743, mean_q: 1.000000\n", " 142024/1000000: episode: 213, duration: 5.236s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 142656/1000000: episode: 214, duration: 5.340s, episode steps: 632, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 143290/1000000: episode: 215, duration: 5.075s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.989 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555784, mean_absolute_error: 0.014590, acc: 0.995261, mean_q: 1.000000\n", " 143917/1000000: episode: 216, duration: 5.041s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 144551/1000000: episode: 217, duration: 5.366s, episode steps: 634, steps per second: 118, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 145178/1000000: episode: 218, duration: 5.035s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 145814/1000000: episode: 219, duration: 5.123s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000\n", " 146450/1000000: episode: 220, duration: 5.547s, episode steps: 636, steps per second: 115, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 147069/1000000: episode: 221, duration: 5.003s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 147701/1000000: episode: 222, duration: 5.100s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 148326/1000000: episode: 223, duration: 5.040s, episode steps: 625, steps per second: 124, episode reward: 110.000, mean reward: 0.176 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.882085, mean_absolute_error: 0.021364, acc: 0.996795, mean_q: 1.000000\n", " 148951/1000000: episode: 224, duration: 5.413s, episode steps: 625, steps per second: 115, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 149580/1000000: episode: 225, duration: 5.214s, episode steps: 629, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 150212/1000000: episode: 226, duration: 5.042s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [4.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557546, mean_absolute_error: 0.014633, acc: 0.995246, mean_q: 1.000000\n", " 150840/1000000: episode: 227, duration: 5.027s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 151461/1000000: episode: 228, duration: 4.970s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 152083/1000000: episode: 229, duration: 4.969s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [5.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.566523, mean_absolute_error: 0.014851, acc: 0.995169, mean_q: 1.000000\n", " 152707/1000000: episode: 230, duration: 4.987s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 153335/1000000: episode: 231, duration: 5.008s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 153962/1000000: episode: 232, duration: 4.987s, episode steps: 627, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 154591/1000000: episode: 233, duration: 5.009s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 155211/1000000: episode: 234, duration: 4.957s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 155839/1000000: episode: 235, duration: 5.117s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 156483/1000000: episode: 236, duration: 5.255s, episode steps: 644, steps per second: 123, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.545602, mean_absolute_error: 0.013867, acc: 0.996890, mean_q: 1.000000\n", " 157111/1000000: episode: 237, duration: 5.084s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 157740/1000000: episode: 238, duration: 5.172s, episode steps: 629, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 158377/1000000: episode: 239, duration: 5.128s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 159009/1000000: episode: 240, duration: 5.082s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.978 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557546, mean_absolute_error: 0.014633, acc: 0.995246, mean_q: 1.000000\n", " 159622/1000000: episode: 241, duration: 4.967s, episode steps: 613, steps per second: 123, episode reward: 100.000, mean reward: 0.163 [0.000, 10.000], mean action: 7.992 [5.000, 8.000], mean observation: 72.897 [0.000, 228.000], loss: 0.819461, mean_absolute_error: 0.020488, acc: 0.995098, mean_q: 1.000000\n", " 160254/1000000: episode: 242, duration: 5.505s, episode steps: 632, steps per second: 115, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 160896/1000000: episode: 243, duration: 5.143s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.988 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.548848, mean_absolute_error: 0.014422, acc: 0.995320, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 161535/1000000: episode: 244, duration: 5.120s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 162174/1000000: episode: 245, duration: 5.199s, episode steps: 639, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.549877, mean_absolute_error: 0.013967, acc: 0.996865, mean_q: 1.000000\n", " 162814/1000000: episode: 246, duration: 5.545s, episode steps: 640, steps per second: 115, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 163444/1000000: episode: 247, duration: 5.089s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 164064/1000000: episode: 248, duration: 5.001s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 164695/1000000: episode: 249, duration: 5.401s, episode steps: 631, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.983 [2.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000\n", " 165722/1000000: episode: 250, duration: 8.493s, episode steps: 1027, steps per second: 121, episode reward: 240.000, mean reward: 0.234 [0.000, 50.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.760 [0.000, 228.000], loss: 2.143412, mean_absolute_error: 0.027488, acc: 0.998051, mean_q: 1.000000\n", " 166352/1000000: episode: 251, duration: 5.105s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 166980/1000000: episode: 252, duration: 5.066s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 167610/1000000: episode: 253, duration: 5.072s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 168238/1000000: episode: 254, duration: 5.542s, episode steps: 628, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 169548/1000000: episode: 255, duration: 10.496s, episode steps: 1310, steps per second: 125, episode reward: 240.000, mean reward: 0.183 [0.000, 50.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.820 [0.000, 228.000], loss: 1.680783, mean_absolute_error: 0.022037, acc: 0.997708, mean_q: 1.000000\n", " 170163/1000000: episode: 256, duration: 4.966s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 171066/1000000: episode: 257, duration: 7.415s, episode steps: 903, steps per second: 122, episode reward: 440.000, mean reward: 0.487 [0.000, 200.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.805 [0.000, 228.000], loss: 24.610994, mean_absolute_error: 0.056479, acc: 0.995565, mean_q: 1.000000\n", " 171702/1000000: episode: 258, duration: 5.216s, episode steps: 636, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 172315/1000000: episode: 259, duration: 5.027s, episode steps: 613, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000\n", " 172947/1000000: episode: 260, duration: 5.078s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000\n", " 173572/1000000: episode: 261, duration: 5.057s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 174202/1000000: episode: 262, duration: 5.199s, episode steps: 630, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 174827/1000000: episode: 263, duration: 5.225s, episode steps: 625, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 175455/1000000: episode: 264, duration: 5.327s, episode steps: 628, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 176073/1000000: episode: 265, duration: 5.043s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 176697/1000000: episode: 266, duration: 5.463s, episode steps: 624, steps per second: 114, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 177327/1000000: episode: 267, duration: 5.070s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 177951/1000000: episode: 268, duration: 5.337s, episode steps: 624, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000\n", " 178576/1000000: episode: 269, duration: 5.124s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 179223/1000000: episode: 270, duration: 5.238s, episode steps: 647, steps per second: 124, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.541536, mean_absolute_error: 0.013297, acc: 0.998452, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 179841/1000000: episode: 271, duration: 5.042s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.568591, mean_absolute_error: 0.014405, acc: 0.996759, mean_q: 1.000000\n", " 180471/1000000: episode: 272, duration: 5.093s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 181092/1000000: episode: 273, duration: 5.049s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 181718/1000000: episode: 274, duration: 5.399s, episode steps: 626, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 182340/1000000: episode: 275, duration: 5.495s, episode steps: 622, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 182968/1000000: episode: 276, duration: 5.035s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 183595/1000000: episode: 277, duration: 5.329s, episode steps: 627, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.978 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561999, mean_absolute_error: 0.014741, acc: 0.995208, mean_q: 1.000000\n", " 184226/1000000: episode: 278, duration: 5.953s, episode steps: 631, steps per second: 106, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 184851/1000000: episode: 279, duration: 5.952s, episode steps: 625, steps per second: 105, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.563800, mean_absolute_error: 0.014785, acc: 0.995192, mean_q: 1.000000\n", " 185479/1000000: episode: 280, duration: 5.378s, episode steps: 628, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 186335/1000000: episode: 281, duration: 7.031s, episode steps: 856, steps per second: 122, episode reward: 840.000, mean reward: 0.981 [0.000, 400.000], mean action: 7.988 [1.000, 8.000], mean observation: 72.803 [0.000, 228.000], loss: 119.525292, mean_absolute_error: 0.111116, acc: 0.996491, mean_q: 1.000000\n", " 186957/1000000: episode: 282, duration: 5.255s, episode steps: 622, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 187585/1000000: episode: 283, duration: 5.133s, episode steps: 628, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 188212/1000000: episode: 284, duration: 5.294s, episode steps: 627, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 188844/1000000: episode: 285, duration: 5.435s, episode steps: 632, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000\n", " 189475/1000000: episode: 286, duration: 5.136s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 190097/1000000: episode: 287, duration: 5.067s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.881 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 190729/1000000: episode: 288, duration: 5.240s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 191360/1000000: episode: 289, duration: 5.093s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 191982/1000000: episode: 290, duration: 5.022s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 192614/1000000: episode: 291, duration: 5.366s, episode steps: 632, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 193233/1000000: episode: 292, duration: 5.191s, episode steps: 619, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 193849/1000000: episode: 293, duration: 4.960s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 194470/1000000: episode: 294, duration: 5.017s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 195103/1000000: episode: 295, duration: 5.076s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 196054/1000000: episode: 296, duration: 7.657s, episode steps: 951, steps per second: 124, episode reward: 840.000, mean reward: 0.883 [0.000, 400.000], mean action: 7.989 [0.000, 8.000], mean observation: 72.832 [0.000, 228.000], loss: 107.572768, mean_absolute_error: 0.100116, acc: 0.996842, mean_q: 1.000000\n", " 196682/1000000: episode: 297, duration: 5.044s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 197324/1000000: episode: 298, duration: 5.166s, episode steps: 642, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000\n", " 197959/1000000: episode: 299, duration: 5.185s, episode steps: 635, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 198574/1000000: episode: 300, duration: 4.956s, episode steps: 615, steps per second: 124, episode reward: 80.000, mean reward: 0.130 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.901 [0.000, 228.000], loss: 0.652639, mean_absolute_error: 0.016276, acc: 0.996743, mean_q: 1.000000\n", " 199195/1000000: episode: 301, duration: 4.992s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 199841/1000000: episode: 302, duration: 5.200s, episode steps: 646, steps per second: 124, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.542375, mean_absolute_error: 0.013316, acc: 0.998450, mean_q: 1.000000\n", " 200458/1000000: episode: 303, duration: 4.978s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 201079/1000000: episode: 304, duration: 5.025s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 201707/1000000: episode: 305, duration: 5.038s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 202340/1000000: episode: 306, duration: 5.087s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 202979/1000000: episode: 307, duration: 5.193s, episode steps: 639, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549877, mean_absolute_error: 0.013967, acc: 0.996865, mean_q: 1.000000\n", " 203610/1000000: episode: 308, duration: 5.070s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 204248/1000000: episode: 309, duration: 5.123s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 204871/1000000: episode: 310, duration: 5.003s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 205504/1000000: episode: 311, duration: 5.076s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 206128/1000000: episode: 312, duration: 5.025s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000\n", " 206761/1000000: episode: 313, duration: 5.112s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 207374/1000000: episode: 314, duration: 4.939s, episode steps: 613, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.573236, mean_absolute_error: 0.014513, acc: 0.996732, mean_q: 1.000000\n", " 208003/1000000: episode: 315, duration: 5.061s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 208635/1000000: episode: 316, duration: 5.066s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000\n", " 209257/1000000: episode: 317, duration: 4.981s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 209884/1000000: episode: 318, duration: 5.016s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 210511/1000000: episode: 319, duration: 5.018s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 211128/1000000: episode: 320, duration: 4.947s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 211996/1000000: episode: 321, duration: 7.001s, episode steps: 868, steps per second: 124, episode reward: 440.000, mean reward: 0.507 [0.000, 200.000], mean action: 7.992 [1.000, 8.000], mean observation: 72.796 [0.000, 228.000], loss: 25.602232, mean_absolute_error: 0.057953, acc: 0.997693, mean_q: 1.000000\n", " 212619/1000000: episode: 322, duration: 4.981s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 213241/1000000: episode: 323, duration: 4.954s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 213861/1000000: episode: 324, duration: 4.952s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 214472/1000000: episode: 325, duration: 4.930s, episode steps: 611, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.575115, mean_absolute_error: 0.014557, acc: 0.996721, mean_q: 1.000000\n", " 215094/1000000: episode: 326, duration: 5.080s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 215731/1000000: episode: 327, duration: 5.299s, episode steps: 637, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 216368/1000000: episode: 328, duration: 5.182s, episode steps: 637, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000\n", " 216999/1000000: episode: 329, duration: 5.186s, episode steps: 631, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000\n", " 217642/1000000: episode: 330, duration: 5.200s, episode steps: 643, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000\n", " 218267/1000000: episode: 331, duration: 5.065s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 218885/1000000: episode: 332, duration: 4.982s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 219517/1000000: episode: 333, duration: 5.194s, episode steps: 632, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 220146/1000000: episode: 334, duration: 5.573s, episode steps: 629, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 220783/1000000: episode: 335, duration: 5.128s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 221398/1000000: episode: 336, duration: 4.969s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 222022/1000000: episode: 337, duration: 5.033s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 222667/1000000: episode: 338, duration: 5.188s, episode steps: 645, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.544755, mean_absolute_error: 0.013847, acc: 0.996894, mean_q: 1.000000\n", " 223286/1000000: episode: 339, duration: 4.974s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 223909/1000000: episode: 340, duration: 5.055s, episode steps: 623, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000\n", " 224535/1000000: episode: 341, duration: 5.567s, episode steps: 626, steps per second: 112, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 225161/1000000: episode: 342, duration: 5.067s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 225792/1000000: episode: 343, duration: 5.105s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 226425/1000000: episode: 344, duration: 5.111s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 227049/1000000: episode: 345, duration: 5.057s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 227664/1000000: episode: 346, duration: 5.398s, episode steps: 615, steps per second: 114, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.571369, mean_absolute_error: 0.014470, acc: 0.996743, mean_q: 1.000000\n", " 228290/1000000: episode: 347, duration: 5.051s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 228919/1000000: episode: 348, duration: 5.061s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 229539/1000000: episode: 349, duration: 5.053s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 230176/1000000: episode: 350, duration: 5.228s, episode steps: 637, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 230803/1000000: episode: 351, duration: 5.228s, episode steps: 627, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 231431/1000000: episode: 352, duration: 5.101s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 232075/1000000: episode: 353, duration: 5.187s, episode steps: 644, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.544062, mean_absolute_error: 0.013354, acc: 0.998445, mean_q: 1.000000\n", " 232705/1000000: episode: 354, duration: 5.275s, episode steps: 630, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 233340/1000000: episode: 355, duration: 5.577s, episode steps: 635, steps per second: 114, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 233976/1000000: episode: 356, duration: 5.112s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 234609/1000000: episode: 357, duration: 5.084s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 235243/1000000: episode: 358, duration: 5.096s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 235880/1000000: episode: 359, duration: 5.086s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000\n", " 236495/1000000: episode: 360, duration: 4.940s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 237117/1000000: episode: 361, duration: 4.982s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 237742/1000000: episode: 362, duration: 5.050s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 238384/1000000: episode: 363, duration: 5.150s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000\n", " 239018/1000000: episode: 364, duration: 5.270s, episode steps: 634, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 239635/1000000: episode: 365, duration: 5.092s, episode steps: 617, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 240261/1000000: episode: 366, duration: 5.055s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 240897/1000000: episode: 367, duration: 5.117s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 241527/1000000: episode: 368, duration: 5.082s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 242157/1000000: episode: 369, duration: 5.149s, episode steps: 630, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 242779/1000000: episode: 370, duration: 5.526s, episode steps: 622, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 243415/1000000: episode: 371, duration: 5.129s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000\n", " 244045/1000000: episode: 372, duration: 5.043s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 244678/1000000: episode: 373, duration: 5.074s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 245311/1000000: episode: 374, duration: 5.088s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 245946/1000000: episode: 375, duration: 5.082s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 246569/1000000: episode: 376, duration: 5.018s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 247194/1000000: episode: 377, duration: 5.009s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 247806/1000000: episode: 378, duration: 5.016s, episode steps: 612, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 248430/1000000: episode: 379, duration: 5.024s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 249069/1000000: episode: 380, duration: 5.133s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 249704/1000000: episode: 381, duration: 5.091s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 250352/1000000: episode: 382, duration: 5.635s, episode steps: 648, steps per second: 115, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.540699, mean_absolute_error: 0.013278, acc: 0.998454, mean_q: 1.000000\n", " 250984/1000000: episode: 383, duration: 5.125s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 251624/1000000: episode: 384, duration: 5.163s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 252255/1000000: episode: 385, duration: 5.113s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 252875/1000000: episode: 386, duration: 5.494s, episode steps: 620, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 253494/1000000: episode: 387, duration: 5.039s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 254124/1000000: episode: 388, duration: 5.073s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 254747/1000000: episode: 389, duration: 5.040s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 255365/1000000: episode: 390, duration: 4.992s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 256003/1000000: episode: 391, duration: 5.739s, episode steps: 638, steps per second: 111, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550741, mean_absolute_error: 0.013987, acc: 0.996860, mean_q: 1.000000\n", " 256626/1000000: episode: 392, duration: 5.036s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 257246/1000000: episode: 393, duration: 5.040s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 257875/1000000: episode: 394, duration: 5.057s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 258489/1000000: episode: 395, duration: 4.983s, episode steps: 614, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 259121/1000000: episode: 396, duration: 5.088s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 259750/1000000: episode: 397, duration: 5.084s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 260599/1000000: episode: 398, duration: 7.239s, episode steps: 849, steps per second: 117, episode reward: 440.000, mean reward: 0.518 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.798 [0.000, 228.000], loss: 26.175866, mean_absolute_error: 0.059227, acc: 0.997642, mean_q: 1.000000\n", " 261235/1000000: episode: 399, duration: 5.146s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 261864/1000000: episode: 400, duration: 5.100s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 262494/1000000: episode: 401, duration: 5.576s, episode steps: 630, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 263123/1000000: episode: 402, duration: 5.129s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 263747/1000000: episode: 403, duration: 5.065s, episode steps: 624, steps per second: 123, episode reward: 110.000, mean reward: 0.176 [0.000, 10.000], mean action: 7.968 [0.000, 8.000], mean observation: 72.902 [0.000, 228.000], loss: 0.886679, mean_absolute_error: 0.022456, acc: 0.993579, mean_q: 1.000000\n", " 264377/1000000: episode: 404, duration: 5.337s, episode steps: 630, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 265002/1000000: episode: 405, duration: 5.074s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 265628/1000000: episode: 406, duration: 5.087s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 266261/1000000: episode: 407, duration: 5.452s, episode steps: 633, steps per second: 116, episode reward: 160.000, mean reward: 0.253 [0.000, 10.000], mean action: 7.986 [3.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 1.267265, mean_absolute_error: 0.030403, acc: 0.995253, mean_q: 1.000000\n", " 266897/1000000: episode: 408, duration: 5.177s, episode steps: 636, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 267525/1000000: episode: 409, duration: 5.062s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 268148/1000000: episode: 410, duration: 5.013s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 268777/1000000: episode: 411, duration: 5.082s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 269418/1000000: episode: 412, duration: 5.161s, episode steps: 641, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 270044/1000000: episode: 413, duration: 5.400s, episode steps: 626, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 270669/1000000: episode: 414, duration: 5.024s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 271293/1000000: episode: 415, duration: 5.059s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000\n", " 271921/1000000: episode: 416, duration: 5.031s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 272549/1000000: episode: 417, duration: 5.034s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 273176/1000000: episode: 418, duration: 5.001s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 273810/1000000: episode: 419, duration: 5.068s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 274448/1000000: episode: 420, duration: 5.108s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550741, mean_absolute_error: 0.013987, acc: 0.996860, mean_q: 1.000000\n", " 275081/1000000: episode: 421, duration: 5.060s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 275713/1000000: episode: 422, duration: 5.039s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 276341/1000000: episode: 423, duration: 5.010s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 276962/1000000: episode: 424, duration: 4.966s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 277586/1000000: episode: 425, duration: 5.014s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 278206/1000000: episode: 426, duration: 5.003s, episode steps: 620, steps per second: 124, episode reward: 110.000, mean reward: 0.177 [0.000, 10.000], mean action: 7.977 [1.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.892408, mean_absolute_error: 0.022594, acc: 0.993538, mean_q: 1.000000\n", " 278825/1000000: episode: 427, duration: 4.996s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 279443/1000000: episode: 428, duration: 4.969s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 280072/1000000: episode: 429, duration: 5.550s, episode steps: 629, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.979 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560209, mean_absolute_error: 0.014697, acc: 0.995223, mean_q: 1.000000\n", " 280693/1000000: episode: 430, duration: 5.040s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 281315/1000000: episode: 431, duration: 5.010s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 281953/1000000: episode: 432, duration: 5.151s, episode steps: 638, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 282571/1000000: episode: 433, duration: 4.977s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 283187/1000000: episode: 434, duration: 5.054s, episode steps: 616, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 283806/1000000: episode: 435, duration: 5.227s, episode steps: 619, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 284440/1000000: episode: 436, duration: 5.344s, episode steps: 634, steps per second: 119, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 285066/1000000: episode: 437, duration: 5.237s, episode steps: 626, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 285690/1000000: episode: 438, duration: 5.068s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 286314/1000000: episode: 439, duration: 5.131s, episode steps: 624, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 286937/1000000: episode: 440, duration: 5.286s, episode steps: 623, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 287766/1000000: episode: 441, duration: 6.812s, episode steps: 829, steps per second: 122, episode reward: 440.000, mean reward: 0.531 [0.000, 200.000], mean action: 7.983 [1.000, 8.000], mean observation: 72.811 [0.000, 228.000], loss: 26.809328, mean_absolute_error: 0.061029, acc: 0.996377, mean_q: 1.000000\n", " 288389/1000000: episode: 442, duration: 4.997s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 289021/1000000: episode: 443, duration: 5.151s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 289636/1000000: episode: 444, duration: 4.987s, episode steps: 615, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 290263/1000000: episode: 445, duration: 5.076s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 290897/1000000: episode: 446, duration: 5.146s, episode steps: 634, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 291571/1000000: episode: 447, duration: 5.509s, episode steps: 674, steps per second: 122, episode reward: 100.000, mean reward: 0.148 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.902 [0.000, 228.000], loss: 0.743720, mean_absolute_error: 0.018242, acc: 0.997028, mean_q: 1.000000\n", " 292204/1000000: episode: 448, duration: 5.101s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 292816/1000000: episode: 449, duration: 5.003s, episode steps: 612, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000\n", " 293444/1000000: episode: 450, duration: 5.056s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 294068/1000000: episode: 451, duration: 5.020s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 294690/1000000: episode: 452, duration: 5.034s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 295315/1000000: episode: 453, duration: 5.032s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 295938/1000000: episode: 454, duration: 5.026s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 296569/1000000: episode: 455, duration: 5.189s, episode steps: 631, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 297201/1000000: episode: 456, duration: 5.109s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 297832/1000000: episode: 457, duration: 5.092s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 298459/1000000: episode: 458, duration: 5.030s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 299079/1000000: episode: 459, duration: 4.963s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 299704/1000000: episode: 460, duration: 5.127s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 300351/1000000: episode: 461, duration: 5.223s, episode steps: 647, steps per second: 124, episode reward: 70.000, mean reward: 0.108 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.541536, mean_absolute_error: 0.013297, acc: 0.998452, mean_q: 1.000000\n", " 300930/1000000: episode: 462, duration: 4.676s, episode steps: 579, steps per second: 124, episode reward: 160.000, mean reward: 0.276 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.815 [0.000, 228.000], loss: 1.383942, mean_absolute_error: 0.032568, acc: 0.996540, mean_q: 1.000000\n", " 301552/1000000: episode: 463, duration: 5.009s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 302175/1000000: episode: 464, duration: 5.008s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 302800/1000000: episode: 465, duration: 5.454s, episode steps: 625, steps per second: 115, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 303439/1000000: episode: 466, duration: 5.279s, episode steps: 639, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 304062/1000000: episode: 467, duration: 5.113s, episode steps: 623, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 304702/1000000: episode: 468, duration: 5.267s, episode steps: 640, steps per second: 122, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 305327/1000000: episode: 469, duration: 5.089s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 305956/1000000: episode: 470, duration: 5.124s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 306567/1000000: episode: 471, duration: 4.988s, episode steps: 611, steps per second: 123, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000\n", " 307195/1000000: episode: 472, duration: 5.196s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 307830/1000000: episode: 473, duration: 5.086s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 308460/1000000: episode: 474, duration: 5.046s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 309090/1000000: episode: 475, duration: 5.054s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 309723/1000000: episode: 476, duration: 5.068s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 310348/1000000: episode: 477, duration: 5.016s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 310972/1000000: episode: 478, duration: 5.014s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 311604/1000000: episode: 479, duration: 5.109s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 312225/1000000: episode: 480, duration: 5.045s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 312846/1000000: episode: 481, duration: 5.099s, episode steps: 621, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 313470/1000000: episode: 482, duration: 5.098s, episode steps: 624, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 314097/1000000: episode: 483, duration: 5.119s, episode steps: 627, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 314727/1000000: episode: 484, duration: 5.103s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 315347/1000000: episode: 485, duration: 4.983s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 315980/1000000: episode: 486, duration: 5.123s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 316601/1000000: episode: 487, duration: 5.069s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 317238/1000000: episode: 488, duration: 5.141s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 317863/1000000: episode: 489, duration: 5.097s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 318490/1000000: episode: 490, duration: 5.051s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 319115/1000000: episode: 491, duration: 5.017s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 319743/1000000: episode: 492, duration: 5.080s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 320365/1000000: episode: 493, duration: 5.000s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 320994/1000000: episode: 494, duration: 5.039s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 321621/1000000: episode: 495, duration: 5.010s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 322249/1000000: episode: 496, duration: 5.010s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 322874/1000000: episode: 497, duration: 4.998s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 323505/1000000: episode: 498, duration: 5.044s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 324127/1000000: episode: 499, duration: 4.982s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 324761/1000000: episode: 500, duration: 5.074s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 325394/1000000: episode: 501, duration: 5.064s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 326025/1000000: episode: 502, duration: 5.075s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 326648/1000000: episode: 503, duration: 4.995s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 327269/1000000: episode: 504, duration: 4.964s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 327899/1000000: episode: 505, duration: 5.015s, episode steps: 630, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 328540/1000000: episode: 506, duration: 5.141s, episode steps: 641, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 329170/1000000: episode: 507, duration: 5.214s, episode steps: 630, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 329802/1000000: episode: 508, duration: 5.098s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000\n", " 330430/1000000: episode: 509, duration: 5.058s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 331055/1000000: episode: 510, duration: 5.052s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 331690/1000000: episode: 511, duration: 5.087s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 332327/1000000: episode: 512, duration: 5.087s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 332955/1000000: episode: 513, duration: 5.039s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 333574/1000000: episode: 514, duration: 4.980s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 334204/1000000: episode: 515, duration: 5.081s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 334836/1000000: episode: 516, duration: 5.088s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 335461/1000000: episode: 517, duration: 5.001s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 336082/1000000: episode: 518, duration: 4.993s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 336714/1000000: episode: 519, duration: 5.087s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 337342/1000000: episode: 520, duration: 5.039s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 337968/1000000: episode: 521, duration: 5.030s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 338598/1000000: episode: 522, duration: 5.135s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 339231/1000000: episode: 523, duration: 5.087s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 339855/1000000: episode: 524, duration: 4.992s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [4.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.580756, mean_absolute_error: 0.014810, acc: 0.995185, mean_q: 1.000000\n", " 340481/1000000: episode: 525, duration: 5.063s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 341112/1000000: episode: 526, duration: 5.080s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 341751/1000000: episode: 527, duration: 5.150s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 342382/1000000: episode: 528, duration: 5.081s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 343001/1000000: episode: 529, duration: 4.985s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 343632/1000000: episode: 530, duration: 5.079s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 344256/1000000: episode: 531, duration: 5.052s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 344887/1000000: episode: 532, duration: 5.119s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 345502/1000000: episode: 533, duration: 5.064s, episode steps: 615, steps per second: 121, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 346122/1000000: episode: 534, duration: 5.015s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 346977/1000000: episode: 535, duration: 6.916s, episode steps: 855, steps per second: 124, episode reward: 440.000, mean reward: 0.515 [0.000, 200.000], mean action: 7.992 [1.000, 8.000], mean observation: 72.791 [0.000, 228.000], loss: 25.991961, mean_absolute_error: 0.058819, acc: 0.997658, mean_q: 1.000000\n", " 347614/1000000: episode: 536, duration: 5.115s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000\n", " 348239/1000000: episode: 537, duration: 5.053s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 348865/1000000: episode: 538, duration: 5.234s, episode steps: 626, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.982 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000\n", " 349491/1000000: episode: 539, duration: 5.049s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 350119/1000000: episode: 540, duration: 5.128s, episode steps: 628, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 350746/1000000: episode: 541, duration: 5.324s, episode steps: 627, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 351379/1000000: episode: 542, duration: 5.089s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 352011/1000000: episode: 543, duration: 5.302s, episode steps: 632, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 352639/1000000: episode: 544, duration: 5.489s, episode steps: 628, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 353273/1000000: episode: 545, duration: 5.370s, episode steps: 634, steps per second: 118, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 353905/1000000: episode: 546, duration: 5.715s, episode steps: 632, steps per second: 111, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 354533/1000000: episode: 547, duration: 5.850s, episode steps: 628, steps per second: 107, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 355158/1000000: episode: 548, duration: 5.628s, episode steps: 625, steps per second: 111, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 355784/1000000: episode: 549, duration: 5.390s, episode steps: 626, steps per second: 116, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 356407/1000000: episode: 550, duration: 5.117s, episode steps: 623, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000\n", " 357036/1000000: episode: 551, duration: 5.322s, episode steps: 629, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 357670/1000000: episode: 552, duration: 5.303s, episode steps: 634, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 358290/1000000: episode: 553, duration: 5.034s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 358917/1000000: episode: 554, duration: 5.155s, episode steps: 627, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 359554/1000000: episode: 555, duration: 5.177s, episode steps: 637, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 360173/1000000: episode: 556, duration: 5.038s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 360794/1000000: episode: 557, duration: 5.209s, episode steps: 621, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 361414/1000000: episode: 558, duration: 5.088s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 362045/1000000: episode: 559, duration: 5.248s, episode steps: 631, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 362667/1000000: episode: 560, duration: 5.265s, episode steps: 622, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 363297/1000000: episode: 561, duration: 5.137s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 363927/1000000: episode: 562, duration: 5.174s, episode steps: 630, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 364555/1000000: episode: 563, duration: 5.050s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 365195/1000000: episode: 564, duration: 5.211s, episode steps: 640, steps per second: 123, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 366341/1000000: episode: 565, duration: 9.548s, episode steps: 1146, steps per second: 120, episode reward: 1640.000, mean reward: 1.431 [0.000, 800.000], mean action: 7.994 [1.000, 8.000], mean observation: 72.846 [0.000, 228.000], loss: 368.720654, mean_absolute_error: 0.160597, acc: 0.998253, mean_q: 1.000000\n", " 366964/1000000: episode: 566, duration: 5.001s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 367594/1000000: episode: 567, duration: 5.436s, episode steps: 630, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 368225/1000000: episode: 568, duration: 5.448s, episode steps: 631, steps per second: 116, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 368850/1000000: episode: 569, duration: 5.271s, episode steps: 625, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 369472/1000000: episode: 570, duration: 5.233s, episode steps: 622, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 370308/1000000: episode: 571, duration: 6.877s, episode steps: 836, steps per second: 122, episode reward: 840.000, mean reward: 1.005 [0.000, 400.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.809 [0.000, 228.000], loss: 122.386986, mean_absolute_error: 0.113356, acc: 0.997605, mean_q: 1.000000\n", " 370933/1000000: episode: 572, duration: 5.348s, episode steps: 625, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 371553/1000000: episode: 573, duration: 4.992s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 372502/1000000: episode: 574, duration: 8.149s, episode steps: 949, steps per second: 116, episode reward: 840.000, mean reward: 0.885 [0.000, 400.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.806 [0.000, 228.000], loss: 107.798670, mean_absolute_error: 0.099977, acc: 0.997890, mean_q: 1.000000\n", " 373143/1000000: episode: 575, duration: 8.422s, episode steps: 641, steps per second: 76, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.988 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.548159, mean_absolute_error: 0.013927, acc: 0.996875, mean_q: 1.000000\n", " 373778/1000000: episode: 576, duration: 5.268s, episode steps: 635, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 374397/1000000: episode: 577, duration: 5.106s, episode steps: 619, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 375033/1000000: episode: 578, duration: 5.828s, episode steps: 636, steps per second: 109, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 375668/1000000: episode: 579, duration: 5.309s, episode steps: 635, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 376286/1000000: episode: 580, duration: 5.011s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 376909/1000000: episode: 581, duration: 5.000s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 377534/1000000: episode: 582, duration: 5.026s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 378178/1000000: episode: 583, duration: 5.163s, episode steps: 644, steps per second: 125, episode reward: 100.000, mean reward: 0.155 [0.000, 10.000], mean action: 7.995 [6.000, 8.000], mean observation: 72.900 [0.000, 228.000], loss: 0.779956, mean_absolute_error: 0.019554, acc: 0.995334, mean_q: 1.000000\n", " 378800/1000000: episode: 584, duration: 5.003s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 379431/1000000: episode: 585, duration: 5.141s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 380060/1000000: episode: 586, duration: 5.310s, episode steps: 629, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 380684/1000000: episode: 587, duration: 5.202s, episode steps: 624, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 381302/1000000: episode: 588, duration: 5.308s, episode steps: 618, steps per second: 116, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [5.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.570196, mean_absolute_error: 0.014940, acc: 0.995138, mean_q: 1.000000\n", " 381919/1000000: episode: 589, duration: 4.981s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 382535/1000000: episode: 590, duration: 5.109s, episode steps: 616, steps per second: 121, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 383161/1000000: episode: 591, duration: 5.124s, episode steps: 626, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 383800/1000000: episode: 592, duration: 5.140s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 384426/1000000: episode: 593, duration: 5.059s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 385050/1000000: episode: 594, duration: 5.107s, episode steps: 624, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 385676/1000000: episode: 595, duration: 5.246s, episode steps: 626, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562898, mean_absolute_error: 0.014763, acc: 0.995200, mean_q: 1.000000\n", " 386292/1000000: episode: 596, duration: 5.130s, episode steps: 616, steps per second: 120, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 386916/1000000: episode: 597, duration: 5.240s, episode steps: 624, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 387536/1000000: episode: 598, duration: 5.261s, episode steps: 620, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 388164/1000000: episode: 599, duration: 5.190s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 388786/1000000: episode: 600, duration: 5.254s, episode steps: 622, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 389406/1000000: episode: 601, duration: 5.019s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 390033/1000000: episode: 602, duration: 5.024s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 390660/1000000: episode: 603, duration: 5.017s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 391278/1000000: episode: 604, duration: 4.958s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 391910/1000000: episode: 605, duration: 5.051s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 392522/1000000: episode: 606, duration: 4.908s, episode steps: 612, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.993 [4.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.574174, mean_absolute_error: 0.014535, acc: 0.996727, mean_q: 1.000000\n", " 393146/1000000: episode: 607, duration: 5.037s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 393764/1000000: episode: 608, duration: 5.102s, episode steps: 618, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 394394/1000000: episode: 609, duration: 5.254s, episode steps: 630, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 395020/1000000: episode: 610, duration: 5.135s, episode steps: 626, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 395643/1000000: episode: 611, duration: 5.160s, episode steps: 623, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 396276/1000000: episode: 612, duration: 5.272s, episode steps: 633, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 397118/1000000: episode: 613, duration: 6.843s, episode steps: 842, steps per second: 123, episode reward: 840.000, mean reward: 0.998 [0.000, 400.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.818 [0.000, 228.000], loss: 121.513833, mean_absolute_error: 0.112555, acc: 0.997622, mean_q: 1.000000\n", " 397747/1000000: episode: 614, duration: 5.107s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 398390/1000000: episode: 615, duration: 5.284s, episode steps: 643, steps per second: 122, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000\n", " 399033/1000000: episode: 616, duration: 5.232s, episode steps: 643, steps per second: 123, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000\n", " 399655/1000000: episode: 617, duration: 5.092s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 400282/1000000: episode: 618, duration: 5.108s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 400920/1000000: episode: 619, duration: 5.172s, episode steps: 638, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552295, mean_absolute_error: 0.014505, acc: 0.995290, mean_q: 1.000000\n", " 401544/1000000: episode: 620, duration: 5.020s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 402179/1000000: episode: 621, duration: 5.081s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 402811/1000000: episode: 622, duration: 5.078s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 403433/1000000: episode: 623, duration: 5.001s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 404534/1000000: episode: 624, duration: 8.875s, episode steps: 1101, steps per second: 124, episode reward: 440.000, mean reward: 0.400 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.805 [0.000, 228.000], loss: 20.179224, mean_absolute_error: 0.045913, acc: 0.998182, mean_q: 1.000000\n", " 405166/1000000: episode: 625, duration: 5.131s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 405791/1000000: episode: 626, duration: 5.127s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 406410/1000000: episode: 627, duration: 5.040s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000\n", " 407034/1000000: episode: 628, duration: 5.112s, episode steps: 624, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 407651/1000000: episode: 629, duration: 5.243s, episode steps: 617, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 408282/1000000: episode: 630, duration: 5.259s, episode steps: 631, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 408898/1000000: episode: 631, duration: 4.956s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 409523/1000000: episode: 632, duration: 5.044s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 410149/1000000: episode: 633, duration: 5.032s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 410775/1000000: episode: 634, duration: 5.006s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 411375/1000000: episode: 635, duration: 4.815s, episode steps: 600, steps per second: 125, episode reward: 130.000, mean reward: 0.217 [0.000, 10.000], mean action: 7.988 [1.000, 8.000], mean observation: 72.878 [0.000, 228.000], loss: 1.085509, mean_absolute_error: 0.025912, acc: 0.996661, mean_q: 1.000000\n", " 412015/1000000: episode: 636, duration: 5.143s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 412646/1000000: episode: 637, duration: 5.128s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 413280/1000000: episode: 638, duration: 5.086s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 413897/1000000: episode: 639, duration: 4.975s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 414530/1000000: episode: 640, duration: 5.086s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 415149/1000000: episode: 641, duration: 4.972s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 415783/1000000: episode: 642, duration: 5.092s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 416408/1000000: episode: 643, duration: 5.025s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 417028/1000000: episode: 644, duration: 5.065s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 417655/1000000: episode: 645, duration: 5.027s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 418296/1000000: episode: 646, duration: 5.158s, episode steps: 641, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 418925/1000000: episode: 647, duration: 5.097s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 419559/1000000: episode: 648, duration: 5.115s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 420187/1000000: episode: 649, duration: 5.061s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 420809/1000000: episode: 650, duration: 5.039s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 421431/1000000: episode: 651, duration: 5.049s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 422059/1000000: episode: 652, duration: 5.067s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 422690/1000000: episode: 653, duration: 5.078s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 423317/1000000: episode: 654, duration: 5.038s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 423949/1000000: episode: 655, duration: 5.085s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 424570/1000000: episode: 656, duration: 5.004s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 425197/1000000: episode: 657, duration: 5.051s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 425829/1000000: episode: 658, duration: 5.077s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 426461/1000000: episode: 659, duration: 5.232s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 427080/1000000: episode: 660, duration: 5.114s, episode steps: 619, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000\n", " 427699/1000000: episode: 661, duration: 5.099s, episode steps: 619, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 428313/1000000: episode: 662, duration: 4.936s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 428951/1000000: episode: 663, duration: 5.102s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.550741, mean_absolute_error: 0.013987, acc: 0.996860, mean_q: 1.000000\n", " 429571/1000000: episode: 664, duration: 4.978s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.981 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.568353, mean_absolute_error: 0.014895, acc: 0.995153, mean_q: 1.000000\n", " 430187/1000000: episode: 665, duration: 4.941s, episode steps: 616, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 430823/1000000: episode: 666, duration: 5.158s, episode steps: 636, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 431454/1000000: episode: 667, duration: 5.098s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 432092/1000000: episode: 668, duration: 5.113s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.976 [0.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.552295, mean_absolute_error: 0.014505, acc: 0.995290, mean_q: 1.000000\n", " 433139/1000000: episode: 669, duration: 8.543s, episode steps: 1047, steps per second: 123, episode reward: 840.000, mean reward: 0.802 [0.000, 400.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.782 [0.000, 228.000], loss: 97.698991, mean_absolute_error: 0.090714, acc: 0.998088, mean_q: 1.000000\n", " 433762/1000000: episode: 670, duration: 5.247s, episode steps: 623, steps per second: 119, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 434388/1000000: episode: 671, duration: 5.190s, episode steps: 626, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 435017/1000000: episode: 672, duration: 5.468s, episode steps: 629, steps per second: 115, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 435642/1000000: episode: 673, duration: 5.275s, episode steps: 625, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 436268/1000000: episode: 674, duration: 5.346s, episode steps: 626, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 436903/1000000: episode: 675, duration: 5.504s, episode steps: 635, steps per second: 115, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 437526/1000000: episode: 676, duration: 5.401s, episode steps: 623, steps per second: 115, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000\n", " 438154/1000000: episode: 677, duration: 5.075s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 438766/1000000: episode: 678, duration: 5.008s, episode steps: 612, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000\n", " 439403/1000000: episode: 679, duration: 5.423s, episode steps: 637, steps per second: 117, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 440026/1000000: episode: 680, duration: 5.211s, episode steps: 623, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 440659/1000000: episode: 681, duration: 5.212s, episode steps: 633, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 441296/1000000: episode: 682, duration: 5.468s, episode steps: 637, steps per second: 116, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 441922/1000000: episode: 683, duration: 5.519s, episode steps: 626, steps per second: 113, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 442555/1000000: episode: 684, duration: 5.308s, episode steps: 633, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556664, mean_absolute_error: 0.014611, acc: 0.995253, mean_q: 1.000000\n", " 443189/1000000: episode: 685, duration: 5.236s, episode steps: 634, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 443804/1000000: episode: 686, duration: 5.048s, episode steps: 615, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.881 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 444434/1000000: episode: 687, duration: 5.175s, episode steps: 630, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 445056/1000000: episode: 688, duration: 5.214s, episode steps: 622, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 445690/1000000: episode: 689, duration: 5.300s, episode steps: 634, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 446309/1000000: episode: 690, duration: 5.086s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 446944/1000000: episode: 691, duration: 5.248s, episode steps: 635, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 447574/1000000: episode: 692, duration: 5.196s, episode steps: 630, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 448196/1000000: episode: 693, duration: 5.435s, episode steps: 622, steps per second: 114, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 448835/1000000: episode: 694, duration: 5.224s, episode steps: 639, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 449466/1000000: episode: 695, duration: 7.247s, episode steps: 631, steps per second: 87, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 450088/1000000: episode: 696, duration: 5.812s, episode steps: 622, steps per second: 107, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 450721/1000000: episode: 697, duration: 5.389s, episode steps: 633, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 451341/1000000: episode: 698, duration: 5.218s, episode steps: 620, steps per second: 119, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 451967/1000000: episode: 699, duration: 5.626s, episode steps: 626, steps per second: 111, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 452592/1000000: episode: 700, duration: 5.315s, episode steps: 625, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 453226/1000000: episode: 701, duration: 5.224s, episode steps: 634, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 453855/1000000: episode: 702, duration: 5.384s, episode steps: 629, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 454483/1000000: episode: 703, duration: 5.517s, episode steps: 628, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 455112/1000000: episode: 704, duration: 5.297s, episode steps: 629, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 455737/1000000: episode: 705, duration: 5.286s, episode steps: 625, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 456368/1000000: episode: 706, duration: 5.304s, episode steps: 631, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 456999/1000000: episode: 707, duration: 5.299s, episode steps: 631, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 457628/1000000: episode: 708, duration: 5.653s, episode steps: 629, steps per second: 111, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 458248/1000000: episode: 709, duration: 5.004s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 458874/1000000: episode: 710, duration: 5.035s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 459509/1000000: episode: 711, duration: 5.153s, episode steps: 635, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 460141/1000000: episode: 712, duration: 5.123s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 460765/1000000: episode: 713, duration: 5.023s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 461383/1000000: episode: 714, duration: 4.992s, episode steps: 618, steps per second: 124, episode reward: 160.000, mean reward: 0.259 [0.000, 10.000], mean action: 7.974 [0.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 1.298072, mean_absolute_error: 0.031115, acc: 0.995138, mean_q: 1.000000\n", " 462007/1000000: episode: 715, duration: 5.027s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.979 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564705, mean_absolute_error: 0.014806, acc: 0.995185, mean_q: 1.000000\n", " 462639/1000000: episode: 716, duration: 5.078s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 463271/1000000: episode: 717, duration: 5.090s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 463901/1000000: episode: 718, duration: 5.140s, episode steps: 630, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 464525/1000000: episode: 719, duration: 5.203s, episode steps: 624, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 465147/1000000: episode: 720, duration: 5.054s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 465763/1000000: episode: 721, duration: 4.972s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 466392/1000000: episode: 722, duration: 5.073s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 467022/1000000: episode: 723, duration: 5.072s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 467657/1000000: episode: 724, duration: 5.124s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 468275/1000000: episode: 725, duration: 4.987s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 468912/1000000: episode: 726, duration: 5.153s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 469547/1000000: episode: 727, duration: 5.132s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 470175/1000000: episode: 728, duration: 5.072s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 470800/1000000: episode: 729, duration: 5.283s, episode steps: 625, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 471426/1000000: episode: 730, duration: 5.156s, episode steps: 626, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 472038/1000000: episode: 731, duration: 5.015s, episode steps: 612, steps per second: 122, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000\n", " 472668/1000000: episode: 732, duration: 5.274s, episode steps: 630, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 473295/1000000: episode: 733, duration: 5.182s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 473928/1000000: episode: 734, duration: 5.253s, episode steps: 633, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 474554/1000000: episode: 735, duration: 5.069s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 475179/1000000: episode: 736, duration: 5.036s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563800, mean_absolute_error: 0.014785, acc: 0.995192, mean_q: 1.000000\n", " 475814/1000000: episode: 737, duration: 5.155s, episode steps: 635, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 476440/1000000: episode: 738, duration: 5.554s, episode steps: 626, steps per second: 113, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 477069/1000000: episode: 739, duration: 5.082s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 477705/1000000: episode: 740, duration: 5.114s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 478330/1000000: episode: 741, duration: 5.024s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 478961/1000000: episode: 742, duration: 5.128s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 479583/1000000: episode: 743, duration: 4.995s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 480210/1000000: episode: 744, duration: 5.103s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 480832/1000000: episode: 745, duration: 5.027s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 481451/1000000: episode: 746, duration: 5.008s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 482076/1000000: episode: 747, duration: 5.070s, episode steps: 625, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 482914/1000000: episode: 748, duration: 6.783s, episode steps: 838, steps per second: 124, episode reward: 440.000, mean reward: 0.525 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.807 [0.000, 228.000], loss: 26.519873, mean_absolute_error: 0.059991, acc: 0.997611, mean_q: 1.000000\n", " 483546/1000000: episode: 749, duration: 5.091s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000\n", " 484170/1000000: episode: 750, duration: 5.080s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 484804/1000000: episode: 751, duration: 5.445s, episode steps: 634, steps per second: 116, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 485603/1000000: episode: 752, duration: 6.580s, episode steps: 799, steps per second: 121, episode reward: 240.000, mean reward: 0.300 [0.000, 50.000], mean action: 7.991 [1.000, 8.000], mean observation: 72.823 [0.000, 228.000], loss: 2.755802, mean_absolute_error: 0.035024, acc: 0.997494, mean_q: 1.000000\n", " 486227/1000000: episode: 753, duration: 5.027s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 486854/1000000: episode: 754, duration: 5.038s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 487472/1000000: episode: 755, duration: 4.955s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 488095/1000000: episode: 756, duration: 4.988s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 488716/1000000: episode: 757, duration: 4.971s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 489356/1000000: episode: 758, duration: 5.123s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 489983/1000000: episode: 759, duration: 4.979s, episode steps: 627, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 490613/1000000: episode: 760, duration: 5.029s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 491247/1000000: episode: 761, duration: 5.087s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 491865/1000000: episode: 762, duration: 4.961s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 492496/1000000: episode: 763, duration: 5.053s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 493135/1000000: episode: 764, duration: 5.134s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 493763/1000000: episode: 765, duration: 5.051s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 494388/1000000: episode: 766, duration: 4.985s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 495021/1000000: episode: 767, duration: 5.103s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 495635/1000000: episode: 768, duration: 4.944s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.572301, mean_absolute_error: 0.014492, acc: 0.996737, mean_q: 1.000000\n", " 496280/1000000: episode: 769, duration: 5.174s, episode steps: 645, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.543218, mean_absolute_error: 0.013335, acc: 0.998447, mean_q: 1.000000\n", " 496903/1000000: episode: 770, duration: 4.975s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000\n", " 497532/1000000: episode: 771, duration: 5.051s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 498157/1000000: episode: 772, duration: 5.030s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 498792/1000000: episode: 773, duration: 5.071s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 499418/1000000: episode: 774, duration: 5.006s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 500044/1000000: episode: 775, duration: 5.000s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 500676/1000000: episode: 776, duration: 5.045s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 501308/1000000: episode: 777, duration: 5.052s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 501941/1000000: episode: 778, duration: 5.062s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 502562/1000000: episode: 779, duration: 5.006s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 503187/1000000: episode: 780, duration: 4.989s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 503810/1000000: episode: 781, duration: 4.992s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 504454/1000000: episode: 782, duration: 5.189s, episode steps: 644, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.545602, mean_absolute_error: 0.013867, acc: 0.996890, mean_q: 1.000000\n", " 505080/1000000: episode: 783, duration: 5.021s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 505700/1000000: episode: 784, duration: 4.951s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 506334/1000000: episode: 785, duration: 5.073s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 506966/1000000: episode: 786, duration: 5.063s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555977, mean_absolute_error: 0.014110, acc: 0.996830, mean_q: 1.000000\n", " 507592/1000000: episode: 787, duration: 5.004s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 508215/1000000: episode: 788, duration: 4.968s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 508838/1000000: episode: 789, duration: 4.970s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 509472/1000000: episode: 790, duration: 5.045s, episode steps: 634, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 510094/1000000: episode: 791, duration: 4.990s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 510725/1000000: episode: 792, duration: 5.023s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 511347/1000000: episode: 793, duration: 4.969s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 511968/1000000: episode: 794, duration: 4.963s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 512589/1000000: episode: 795, duration: 4.979s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 513225/1000000: episode: 796, duration: 5.118s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 513848/1000000: episode: 797, duration: 4.989s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 514480/1000000: episode: 798, duration: 5.061s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 515115/1000000: episode: 799, duration: 5.074s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 515757/1000000: episode: 800, duration: 5.140s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000\n", " 516385/1000000: episode: 801, duration: 5.031s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 517014/1000000: episode: 802, duration: 5.030s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 517653/1000000: episode: 803, duration: 5.102s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 518293/1000000: episode: 804, duration: 5.100s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 518916/1000000: episode: 805, duration: 4.983s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 519544/1000000: episode: 806, duration: 5.091s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 520170/1000000: episode: 807, duration: 5.221s, episode steps: 626, steps per second: 120, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 520792/1000000: episode: 808, duration: 5.321s, episode steps: 622, steps per second: 117, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 521414/1000000: episode: 809, duration: 5.384s, episode steps: 622, steps per second: 116, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 522011/1000000: episode: 810, duration: 4.997s, episode steps: 597, steps per second: 119, episode reward: 160.000, mean reward: 0.268 [0.000, 10.000], mean action: 7.977 [0.000, 8.000], mean observation: 72.818 [0.000, 228.000], loss: 1.327197, mean_absolute_error: 0.031803, acc: 0.994966, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 522631/1000000: episode: 811, duration: 5.024s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 523260/1000000: episode: 812, duration: 5.026s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.542868, mean_absolute_error: 0.013822, acc: 0.996815, mean_q: 1.000000\n", " 523888/1000000: episode: 813, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 524526/1000000: episode: 814, duration: 5.091s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 525149/1000000: episode: 815, duration: 4.963s, episode steps: 623, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 525784/1000000: episode: 816, duration: 5.072s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 526406/1000000: episode: 817, duration: 4.993s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 527037/1000000: episode: 818, duration: 5.047s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 527666/1000000: episode: 819, duration: 5.011s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 528292/1000000: episode: 820, duration: 4.981s, episode steps: 626, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 528921/1000000: episode: 821, duration: 5.023s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 529554/1000000: episode: 822, duration: 5.044s, episode steps: 633, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 530177/1000000: episode: 823, duration: 4.966s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 530808/1000000: episode: 824, duration: 5.063s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 531439/1000000: episode: 825, duration: 5.051s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 532066/1000000: episode: 826, duration: 5.007s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 532700/1000000: episode: 827, duration: 5.126s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 533326/1000000: episode: 828, duration: 5.029s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.897 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 533961/1000000: episode: 829, duration: 5.088s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 534594/1000000: episode: 830, duration: 5.141s, episode steps: 633, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 535225/1000000: episode: 831, duration: 5.081s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 535857/1000000: episode: 832, duration: 5.200s, episode steps: 632, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 536487/1000000: episode: 833, duration: 5.559s, episode steps: 630, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 537103/1000000: episode: 834, duration: 5.477s, episode steps: 616, steps per second: 112, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 537732/1000000: episode: 835, duration: 5.224s, episode steps: 629, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 538355/1000000: episode: 836, duration: 4.979s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 538986/1000000: episode: 837, duration: 5.085s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 539626/1000000: episode: 838, duration: 5.121s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 540259/1000000: episode: 839, duration: 5.072s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 540902/1000000: episode: 840, duration: 5.179s, episode steps: 643, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000\n", " 541522/1000000: episode: 841, duration: 4.998s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 542162/1000000: episode: 842, duration: 5.154s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 542788/1000000: episode: 843, duration: 5.039s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 543409/1000000: episode: 844, duration: 4.980s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 544044/1000000: episode: 845, duration: 5.097s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000\n", " 544673/1000000: episode: 846, duration: 5.049s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 545305/1000000: episode: 847, duration: 5.080s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 545935/1000000: episode: 848, duration: 5.071s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 546560/1000000: episode: 849, duration: 5.017s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 547185/1000000: episode: 850, duration: 5.012s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 547813/1000000: episode: 851, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 548440/1000000: episode: 852, duration: 5.040s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 549075/1000000: episode: 853, duration: 5.115s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 549696/1000000: episode: 854, duration: 4.995s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 550323/1000000: episode: 855, duration: 5.043s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 550957/1000000: episode: 856, duration: 5.073s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 551587/1000000: episode: 857, duration: 5.051s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 552216/1000000: episode: 858, duration: 5.054s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 552853/1000000: episode: 859, duration: 5.126s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000\n", " 553479/1000000: episode: 860, duration: 5.045s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 554111/1000000: episode: 861, duration: 5.098s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 554734/1000000: episode: 862, duration: 5.015s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 555357/1000000: episode: 863, duration: 5.028s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000\n", " 555995/1000000: episode: 864, duration: 5.119s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 556622/1000000: episode: 865, duration: 5.017s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 557244/1000000: episode: 866, duration: 4.961s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 557864/1000000: episode: 867, duration: 4.961s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 558497/1000000: episode: 868, duration: 5.068s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 559117/1000000: episode: 869, duration: 4.980s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 559739/1000000: episode: 870, duration: 5.003s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 560372/1000000: episode: 871, duration: 5.070s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 561003/1000000: episode: 872, duration: 5.055s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 561620/1000000: episode: 873, duration: 4.927s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 562239/1000000: episode: 874, duration: 4.966s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 562921/1000000: episode: 875, duration: 5.471s, episode steps: 682, steps per second: 125, episode reward: 90.000, mean reward: 0.132 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.871 [0.000, 228.000], loss: 0.661709, mean_absolute_error: 0.016412, acc: 0.997063, mean_q: 1.000000\n", " 563547/1000000: episode: 876, duration: 5.036s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 564177/1000000: episode: 877, duration: 5.057s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 564800/1000000: episode: 878, duration: 4.981s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000\n", " 565428/1000000: episode: 879, duration: 5.031s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 566061/1000000: episode: 880, duration: 5.101s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 566683/1000000: episode: 881, duration: 5.012s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 567314/1000000: episode: 882, duration: 5.052s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 567929/1000000: episode: 883, duration: 4.944s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 568550/1000000: episode: 884, duration: 4.993s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 569173/1000000: episode: 885, duration: 4.975s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 569801/1000000: episode: 886, duration: 5.064s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 570441/1000000: episode: 887, duration: 5.147s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 571069/1000000: episode: 888, duration: 5.033s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 571694/1000000: episode: 889, duration: 5.027s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 572339/1000000: episode: 890, duration: 5.180s, episode steps: 645, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.543218, mean_absolute_error: 0.013335, acc: 0.998447, mean_q: 1.000000\n", " 572964/1000000: episode: 891, duration: 5.014s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 573589/1000000: episode: 892, duration: 5.026s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 574216/1000000: episode: 893, duration: 5.039s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 574845/1000000: episode: 894, duration: 5.036s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 575476/1000000: episode: 895, duration: 5.058s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 576104/1000000: episode: 896, duration: 5.077s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 576729/1000000: episode: 897, duration: 5.014s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 577353/1000000: episode: 898, duration: 5.034s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563116, mean_absolute_error: 0.014277, acc: 0.996790, mean_q: 1.000000\n", " 577989/1000000: episode: 899, duration: 5.087s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 578783/1000000: episode: 900, duration: 6.370s, episode steps: 794, steps per second: 125, episode reward: 240.000, mean reward: 0.302 [0.000, 50.000], mean action: 7.991 [1.000, 8.000], mean observation: 72.820 [0.000, 228.000], loss: 2.773177, mean_absolute_error: 0.035237, acc: 0.997478, mean_q: 1.000000\n", " 579413/1000000: episode: 901, duration: 5.075s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 580041/1000000: episode: 902, duration: 5.043s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 580668/1000000: episode: 903, duration: 5.026s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 581293/1000000: episode: 904, duration: 5.026s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 581924/1000000: episode: 905, duration: 5.066s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 582557/1000000: episode: 906, duration: 5.069s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 583187/1000000: episode: 907, duration: 5.055s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 583816/1000000: episode: 908, duration: 5.055s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 584455/1000000: episode: 909, duration: 5.131s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 585088/1000000: episode: 910, duration: 5.063s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 585717/1000000: episode: 911, duration: 5.018s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 586353/1000000: episode: 912, duration: 5.094s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 586978/1000000: episode: 913, duration: 5.006s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 587599/1000000: episode: 914, duration: 4.993s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 588221/1000000: episode: 915, duration: 4.960s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 588845/1000000: episode: 916, duration: 4.983s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 589476/1000000: episode: 917, duration: 5.070s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 590106/1000000: episode: 918, duration: 5.039s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 590730/1000000: episode: 919, duration: 5.006s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 591352/1000000: episode: 920, duration: 5.023s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 591988/1000000: episode: 921, duration: 5.131s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 592623/1000000: episode: 922, duration: 5.090s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 593248/1000000: episode: 923, duration: 5.038s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 593884/1000000: episode: 924, duration: 5.114s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 594510/1000000: episode: 925, duration: 5.040s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 595126/1000000: episode: 926, duration: 4.963s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 595744/1000000: episode: 927, duration: 4.965s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 596380/1000000: episode: 928, duration: 5.090s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 596992/1000000: episode: 929, duration: 4.934s, episode steps: 612, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000\n", " 597614/1000000: episode: 930, duration: 4.983s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 598238/1000000: episode: 931, duration: 5.041s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 598860/1000000: episode: 932, duration: 5.011s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 599495/1000000: episode: 933, duration: 5.073s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 600115/1000000: episode: 934, duration: 4.951s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 600736/1000000: episode: 935, duration: 4.978s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 601366/1000000: episode: 936, duration: 5.066s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 601996/1000000: episode: 937, duration: 5.046s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 602628/1000000: episode: 938, duration: 5.067s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 603242/1000000: episode: 939, duration: 4.923s, episode steps: 614, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 603853/1000000: episode: 940, duration: 4.923s, episode steps: 611, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000\n", " 604474/1000000: episode: 941, duration: 5.024s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 605108/1000000: episode: 942, duration: 5.055s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 605732/1000000: episode: 943, duration: 5.019s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 606360/1000000: episode: 944, duration: 5.042s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 606989/1000000: episode: 945, duration: 5.036s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 607612/1000000: episode: 946, duration: 5.035s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 608232/1000000: episode: 947, duration: 4.964s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 608860/1000000: episode: 948, duration: 5.058s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 609503/1000000: episode: 949, duration: 5.173s, episode steps: 643, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.544910, mean_absolute_error: 0.013373, acc: 0.998442, mean_q: 1.000000\n", " 610128/1000000: episode: 950, duration: 4.999s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 610751/1000000: episode: 951, duration: 4.992s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 611381/1000000: episode: 952, duration: 5.620s, episode steps: 630, steps per second: 112, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 612012/1000000: episode: 953, duration: 5.025s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 612648/1000000: episode: 954, duration: 5.091s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000\n", " 613279/1000000: episode: 955, duration: 5.077s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 613910/1000000: episode: 956, duration: 5.078s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 614534/1000000: episode: 957, duration: 5.026s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 615171/1000000: episode: 958, duration: 5.092s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 615803/1000000: episode: 959, duration: 5.046s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 616422/1000000: episode: 960, duration: 4.958s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 617050/1000000: episode: 961, duration: 5.031s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 617674/1000000: episode: 962, duration: 4.975s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 618295/1000000: episode: 963, duration: 4.955s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 618917/1000000: episode: 964, duration: 4.962s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 619550/1000000: episode: 965, duration: 5.047s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 620184/1000000: episode: 966, duration: 5.058s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 620805/1000000: episode: 967, duration: 4.983s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 621437/1000000: episode: 968, duration: 5.095s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 622068/1000000: episode: 969, duration: 5.060s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 622688/1000000: episode: 970, duration: 4.977s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000\n", " 623296/1000000: episode: 971, duration: 4.917s, episode steps: 608, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.576327, mean_absolute_error: 0.014080, acc: 0.998353, mean_q: 1.000000\n", " 623911/1000000: episode: 972, duration: 4.918s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 624535/1000000: episode: 973, duration: 4.987s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 625159/1000000: episode: 974, duration: 4.969s, episode steps: 624, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 625788/1000000: episode: 975, duration: 5.061s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 626409/1000000: episode: 976, duration: 4.973s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 627033/1000000: episode: 977, duration: 5.020s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 627659/1000000: episode: 978, duration: 5.027s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 628286/1000000: episode: 979, duration: 5.036s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 628901/1000000: episode: 980, duration: 4.938s, episode steps: 615, steps per second: 125, episode reward: 80.000, mean reward: 0.130 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.898 [0.000, 228.000], loss: 0.652639, mean_absolute_error: 0.016276, acc: 0.996743, mean_q: 1.000000\n", " 629535/1000000: episode: 981, duration: 5.091s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 630171/1000000: episode: 982, duration: 5.070s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 630799/1000000: episode: 983, duration: 5.006s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 631423/1000000: episode: 984, duration: 5.001s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 632059/1000000: episode: 985, duration: 5.085s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.536884, mean_absolute_error: 0.013682, acc: 0.996850, mean_q: 1.000000\n", " 632687/1000000: episode: 986, duration: 5.015s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 633311/1000000: episode: 987, duration: 4.984s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 633941/1000000: episode: 988, duration: 5.064s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 634565/1000000: episode: 989, duration: 4.992s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 635193/1000000: episode: 990, duration: 5.033s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 635824/1000000: episode: 991, duration: 5.062s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [3.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.558431, mean_absolute_error: 0.014654, acc: 0.995238, mean_q: 1.000000\n", " 636452/1000000: episode: 992, duration: 5.045s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 637076/1000000: episode: 993, duration: 4.986s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 637710/1000000: episode: 994, duration: 5.117s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 638335/1000000: episode: 995, duration: 4.996s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 638950/1000000: episode: 996, duration: 4.954s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 639594/1000000: episode: 997, duration: 5.165s, episode steps: 644, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.544062, mean_absolute_error: 0.013354, acc: 0.998445, mean_q: 1.000000\n", " 640222/1000000: episode: 998, duration: 5.001s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 640857/1000000: episode: 999, duration: 5.105s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 641490/1000000: episode: 1000, duration: 5.081s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 642109/1000000: episode: 1001, duration: 4.969s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 642732/1000000: episode: 1002, duration: 4.976s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 643368/1000000: episode: 1003, duration: 5.065s, episode steps: 636, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 643995/1000000: episode: 1004, duration: 5.012s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 644639/1000000: episode: 1005, duration: 5.131s, episode steps: 644, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 7.988 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.545602, mean_absolute_error: 0.013867, acc: 0.996890, mean_q: 1.000000\n", " 645276/1000000: episode: 1006, duration: 5.072s, episode steps: 637, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 645912/1000000: episode: 1007, duration: 5.078s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 646550/1000000: episode: 1008, duration: 5.104s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 647171/1000000: episode: 1009, duration: 4.977s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 647809/1000000: episode: 1010, duration: 5.121s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 648447/1000000: episode: 1011, duration: 5.089s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 649077/1000000: episode: 1012, duration: 5.021s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 649716/1000000: episode: 1013, duration: 5.137s, episode steps: 639, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.549877, mean_absolute_error: 0.013967, acc: 0.996865, mean_q: 1.000000\n", " 650342/1000000: episode: 1014, duration: 5.002s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 650967/1000000: episode: 1015, duration: 4.995s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 651588/1000000: episode: 1016, duration: 4.967s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 652214/1000000: episode: 1017, duration: 4.989s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 652849/1000000: episode: 1018, duration: 5.063s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 653469/1000000: episode: 1019, duration: 4.966s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 654102/1000000: episode: 1020, duration: 5.043s, episode steps: 633, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 654728/1000000: episode: 1021, duration: 5.014s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.561314, mean_absolute_error: 0.014235, acc: 0.996800, mean_q: 1.000000\n", " 655349/1000000: episode: 1022, duration: 4.973s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 655978/1000000: episode: 1023, duration: 5.025s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 656594/1000000: episode: 1024, duration: 4.957s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 657220/1000000: episode: 1025, duration: 4.992s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 657844/1000000: episode: 1026, duration: 4.997s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 658479/1000000: episode: 1027, duration: 5.116s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000\n", " 659111/1000000: episode: 1028, duration: 5.056s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 659740/1000000: episode: 1029, duration: 5.034s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 660359/1000000: episode: 1030, duration: 4.975s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000\n", " 660986/1000000: episode: 1031, duration: 5.027s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 661620/1000000: episode: 1032, duration: 5.053s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 662250/1000000: episode: 1033, duration: 5.028s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 662869/1000000: episode: 1034, duration: 4.966s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 663488/1000000: episode: 1035, duration: 4.949s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 664113/1000000: episode: 1036, duration: 5.011s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 664734/1000000: episode: 1037, duration: 4.999s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 665358/1000000: episode: 1038, duration: 5.013s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 665982/1000000: episode: 1039, duration: 5.004s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 666622/1000000: episode: 1040, duration: 5.179s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 667247/1000000: episode: 1041, duration: 5.026s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 667874/1000000: episode: 1042, duration: 5.063s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 668495/1000000: episode: 1043, duration: 5.010s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 669120/1000000: episode: 1044, duration: 5.028s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 669741/1000000: episode: 1045, duration: 4.991s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 670371/1000000: episode: 1046, duration: 5.040s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 670988/1000000: episode: 1047, duration: 4.963s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 671609/1000000: episode: 1048, duration: 4.985s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 672232/1000000: episode: 1049, duration: 4.990s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 672858/1000000: episode: 1050, duration: 5.005s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 673492/1000000: episode: 1051, duration: 5.092s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 674137/1000000: episode: 1052, duration: 5.148s, episode steps: 645, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.543218, mean_absolute_error: 0.013335, acc: 0.998447, mean_q: 1.000000\n", " 674949/1000000: episode: 1053, duration: 6.523s, episode steps: 812, steps per second: 124, episode reward: 440.000, mean reward: 0.542 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.797 [0.000, 228.000], loss: 27.370077, mean_absolute_error: 0.061878, acc: 0.997534, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 675569/1000000: episode: 1054, duration: 4.960s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 676194/1000000: episode: 1055, duration: 4.994s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 676818/1000000: episode: 1056, duration: 5.039s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 677443/1000000: episode: 1057, duration: 4.990s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 678069/1000000: episode: 1058, duration: 5.033s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 678699/1000000: episode: 1059, duration: 5.059s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 679331/1000000: episode: 1060, duration: 5.027s, episode steps: 632, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 679954/1000000: episode: 1061, duration: 4.979s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 680587/1000000: episode: 1062, duration: 5.067s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 681219/1000000: episode: 1063, duration: 5.054s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 681836/1000000: episode: 1064, duration: 4.948s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 682465/1000000: episode: 1065, duration: 5.052s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 683091/1000000: episode: 1066, duration: 5.011s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 683713/1000000: episode: 1067, duration: 4.989s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 684338/1000000: episode: 1068, duration: 4.988s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 684962/1000000: episode: 1069, duration: 5.025s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 685598/1000000: episode: 1070, duration: 5.106s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 686223/1000000: episode: 1071, duration: 5.054s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 686860/1000000: episode: 1072, duration: 5.114s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 687479/1000000: episode: 1073, duration: 4.976s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 688101/1000000: episode: 1074, duration: 5.006s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 688740/1000000: episode: 1075, duration: 5.107s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 689373/1000000: episode: 1076, duration: 5.084s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 690000/1000000: episode: 1077, duration: 5.026s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 690628/1000000: episode: 1078, duration: 5.013s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 691250/1000000: episode: 1079, duration: 4.983s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 691875/1000000: episode: 1080, duration: 5.027s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 692507/1000000: episode: 1081, duration: 5.074s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 693135/1000000: episode: 1082, duration: 5.048s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 693763/1000000: episode: 1083, duration: 5.067s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 694378/1000000: episode: 1084, duration: 4.939s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 695018/1000000: episode: 1085, duration: 5.128s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 695646/1000000: episode: 1086, duration: 5.019s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 696281/1000000: episode: 1087, duration: 5.113s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 696915/1000000: episode: 1088, duration: 5.114s, episode steps: 634, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 697532/1000000: episode: 1089, duration: 4.957s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 698159/1000000: episode: 1090, duration: 5.036s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 698784/1000000: episode: 1091, duration: 5.029s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 699406/1000000: episode: 1092, duration: 5.009s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 700033/1000000: episode: 1093, duration: 5.026s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 700655/1000000: episode: 1094, duration: 5.002s, episode steps: 622, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 701289/1000000: episode: 1095, duration: 5.078s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 701918/1000000: episode: 1096, duration: 5.048s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 702559/1000000: episode: 1097, duration: 5.135s, episode steps: 641, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 703189/1000000: episode: 1098, duration: 5.025s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 703817/1000000: episode: 1099, duration: 5.010s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 704457/1000000: episode: 1100, duration: 5.113s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 705078/1000000: episode: 1101, duration: 4.957s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 705703/1000000: episode: 1102, duration: 5.016s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 706342/1000000: episode: 1103, duration: 5.127s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 706957/1000000: episode: 1104, duration: 4.908s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 707573/1000000: episode: 1105, duration: 4.949s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 708187/1000000: episode: 1106, duration: 4.925s, episode steps: 614, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 708810/1000000: episode: 1107, duration: 4.985s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 709440/1000000: episode: 1108, duration: 5.094s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 710051/1000000: episode: 1109, duration: 4.879s, episode steps: 611, steps per second: 125, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000\n", " 710684/1000000: episode: 1110, duration: 5.085s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 711315/1000000: episode: 1111, duration: 5.095s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 711937/1000000: episode: 1112, duration: 4.971s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 712555/1000000: episode: 1113, duration: 4.966s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 713194/1000000: episode: 1114, duration: 5.107s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 713820/1000000: episode: 1115, duration: 5.005s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 714455/1000000: episode: 1116, duration: 5.123s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 715085/1000000: episode: 1117, duration: 5.022s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 715714/1000000: episode: 1118, duration: 5.035s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 716355/1000000: episode: 1119, duration: 5.134s, episode steps: 641, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 716978/1000000: episode: 1120, duration: 5.009s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000\n", " 717613/1000000: episode: 1121, duration: 5.122s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 718248/1000000: episode: 1122, duration: 5.107s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 718883/1000000: episode: 1123, duration: 5.130s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 719503/1000000: episode: 1124, duration: 4.982s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 720131/1000000: episode: 1125, duration: 5.044s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 720755/1000000: episode: 1126, duration: 5.006s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 721379/1000000: episode: 1127, duration: 4.996s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 722005/1000000: episode: 1128, duration: 5.011s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 722630/1000000: episode: 1129, duration: 5.001s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 723239/1000000: episode: 1130, duration: 4.874s, episode steps: 609, steps per second: 125, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.575379, mean_absolute_error: 0.014059, acc: 0.998355, mean_q: 1.000000\n", " 723879/1000000: episode: 1131, duration: 5.103s, episode steps: 640, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 724504/1000000: episode: 1132, duration: 5.008s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 725126/1000000: episode: 1133, duration: 4.992s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 725755/1000000: episode: 1134, duration: 5.058s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 726376/1000000: episode: 1135, duration: 4.989s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 727005/1000000: episode: 1136, duration: 5.060s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 727624/1000000: episode: 1137, duration: 4.941s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 728251/1000000: episode: 1138, duration: 5.058s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 728884/1000000: episode: 1139, duration: 5.041s, episode steps: 633, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 729507/1000000: episode: 1140, duration: 5.015s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 730132/1000000: episode: 1141, duration: 4.998s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 730763/1000000: episode: 1142, duration: 5.061s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 731395/1000000: episode: 1143, duration: 5.076s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 732021/1000000: episode: 1144, duration: 4.994s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 732644/1000000: episode: 1145, duration: 4.988s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 733265/1000000: episode: 1146, duration: 4.955s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 733887/1000000: episode: 1147, duration: 4.952s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 734493/1000000: episode: 1148, duration: 4.828s, episode steps: 606, steps per second: 126, episode reward: 70.000, mean reward: 0.116 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.579868, mean_absolute_error: 0.014669, acc: 0.996694, mean_q: 1.000000\n", " 735116/1000000: episode: 1149, duration: 4.972s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 735758/1000000: episode: 1150, duration: 5.120s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000\n", " 736382/1000000: episode: 1151, duration: 4.987s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 737017/1000000: episode: 1152, duration: 5.076s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553346, mean_absolute_error: 0.014048, acc: 0.996845, mean_q: 1.000000\n", " 737635/1000000: episode: 1153, duration: 4.942s, episode steps: 618, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 738257/1000000: episode: 1154, duration: 4.946s, episode steps: 622, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 738886/1000000: episode: 1155, duration: 5.050s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 739516/1000000: episode: 1156, duration: 5.036s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 740148/1000000: episode: 1157, duration: 5.051s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 740759/1000000: episode: 1158, duration: 4.870s, episode steps: 611, steps per second: 125, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000\n", " 741388/1000000: episode: 1159, duration: 5.011s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 742012/1000000: episode: 1160, duration: 4.999s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 742635/1000000: episode: 1161, duration: 4.973s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564021, mean_absolute_error: 0.014298, acc: 0.996785, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 743272/1000000: episode: 1162, duration: 5.071s, episode steps: 637, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 743909/1000000: episode: 1163, duration: 5.088s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 744537/1000000: episode: 1164, duration: 4.997s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 745167/1000000: episode: 1165, duration: 5.020s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 745792/1000000: episode: 1166, duration: 4.989s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 746412/1000000: episode: 1167, duration: 4.938s, episode steps: 620, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 747033/1000000: episode: 1168, duration: 4.943s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 747656/1000000: episode: 1169, duration: 4.981s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 748290/1000000: episode: 1170, duration: 5.036s, episode steps: 634, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 748925/1000000: episode: 1171, duration: 5.094s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 749548/1000000: episode: 1172, duration: 5.009s, episode steps: 623, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 750174/1000000: episode: 1173, duration: 5.003s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 750798/1000000: episode: 1174, duration: 4.993s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 751440/1000000: episode: 1175, duration: 5.129s, episode steps: 642, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.545760, mean_absolute_error: 0.013392, acc: 0.998440, mean_q: 1.000000\n", " 752077/1000000: episode: 1176, duration: 5.114s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 752699/1000000: episode: 1177, duration: 4.975s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 753316/1000000: episode: 1178, duration: 4.935s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 753944/1000000: episode: 1179, duration: 5.035s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 754573/1000000: episode: 1180, duration: 5.035s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 755200/1000000: episode: 1181, duration: 4.999s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 755815/1000000: episode: 1182, duration: 4.931s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 756444/1000000: episode: 1183, duration: 5.046s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 757069/1000000: episode: 1184, duration: 4.997s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 757691/1000000: episode: 1185, duration: 4.979s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 758318/1000000: episode: 1186, duration: 5.001s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 758932/1000000: episode: 1187, duration: 4.940s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.881 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 759553/1000000: episode: 1188, duration: 4.984s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 760178/1000000: episode: 1189, duration: 5.025s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 760806/1000000: episode: 1190, duration: 5.051s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 761430/1000000: episode: 1191, duration: 5.019s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 762059/1000000: episode: 1192, duration: 5.084s, episode steps: 629, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 762684/1000000: episode: 1193, duration: 5.018s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 763316/1000000: episode: 1194, duration: 5.077s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 763951/1000000: episode: 1195, duration: 5.106s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 764573/1000000: episode: 1196, duration: 4.962s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 765199/1000000: episode: 1197, duration: 5.014s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 765837/1000000: episode: 1198, duration: 5.137s, episode steps: 638, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 766457/1000000: episode: 1199, duration: 4.949s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.989 [1.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000\n", " 767082/1000000: episode: 1200, duration: 4.973s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 767708/1000000: episode: 1201, duration: 4.977s, episode steps: 626, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 768338/1000000: episode: 1202, duration: 5.005s, episode steps: 630, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 768960/1000000: episode: 1203, duration: 4.989s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 769584/1000000: episode: 1204, duration: 5.016s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 770209/1000000: episode: 1205, duration: 5.003s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 770838/1000000: episode: 1206, duration: 5.026s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 771466/1000000: episode: 1207, duration: 10.561s, episode steps: 628, steps per second: 59, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 772090/1000000: episode: 1208, duration: 7.410s, episode steps: 624, steps per second: 84, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 772718/1000000: episode: 1209, duration: 18.837s, episode steps: 628, steps per second: 33, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 773335/1000000: episode: 1210, duration: 18.701s, episode steps: 617, steps per second: 33, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 773954/1000000: episode: 1211, duration: 15.716s, episode steps: 619, steps per second: 39, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.567671, mean_absolute_error: 0.014383, acc: 0.996764, mean_q: 1.000000\n", " 774581/1000000: episode: 1212, duration: 13.118s, episode steps: 627, steps per second: 48, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 775217/1000000: episode: 1213, duration: 13.156s, episode steps: 636, steps per second: 48, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 775842/1000000: episode: 1214, duration: 13.056s, episode steps: 625, steps per second: 48, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 776472/1000000: episode: 1215, duration: 14.213s, episode steps: 630, steps per second: 44, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 777092/1000000: episode: 1216, duration: 8.248s, episode steps: 620, steps per second: 75, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 777725/1000000: episode: 1217, duration: 9.787s, episode steps: 633, steps per second: 65, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555097, mean_absolute_error: 0.014089, acc: 0.996835, mean_q: 1.000000\n", " 778354/1000000: episode: 1218, duration: 18.938s, episode steps: 629, steps per second: 33, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 778985/1000000: episode: 1219, duration: 18.334s, episode steps: 631, steps per second: 34, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 779615/1000000: episode: 1220, duration: 5.101s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 780240/1000000: episode: 1221, duration: 11.245s, episode steps: 625, steps per second: 56, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 780873/1000000: episode: 1222, duration: 19.039s, episode steps: 633, steps per second: 33, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 781496/1000000: episode: 1223, duration: 16.403s, episode steps: 623, steps per second: 38, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 782122/1000000: episode: 1224, duration: 5.365s, episode steps: 626, steps per second: 117, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 782755/1000000: episode: 1225, duration: 5.567s, episode steps: 633, steps per second: 114, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 783378/1000000: episode: 1226, duration: 5.556s, episode steps: 623, steps per second: 112, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 784015/1000000: episode: 1227, duration: 5.116s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 784643/1000000: episode: 1228, duration: 5.030s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 785270/1000000: episode: 1229, duration: 5.104s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 785890/1000000: episode: 1230, duration: 5.019s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 786522/1000000: episode: 1231, duration: 5.076s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 787140/1000000: episode: 1232, duration: 4.983s, episode steps: 618, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 787759/1000000: episode: 1233, duration: 4.996s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 788383/1000000: episode: 1234, duration: 5.010s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 789016/1000000: episode: 1235, duration: 5.109s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 789644/1000000: episode: 1236, duration: 5.024s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 790276/1000000: episode: 1237, duration: 5.041s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 790914/1000000: episode: 1238, duration: 5.111s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 791551/1000000: episode: 1239, duration: 5.124s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.883 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 792185/1000000: episode: 1240, duration: 5.071s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 792811/1000000: episode: 1241, duration: 5.042s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 793441/1000000: episode: 1242, duration: 5.035s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 794072/1000000: episode: 1243, duration: 5.097s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 794703/1000000: episode: 1244, duration: 5.041s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 795335/1000000: episode: 1245, duration: 5.059s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 795959/1000000: episode: 1246, duration: 4.999s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 796584/1000000: episode: 1247, duration: 5.022s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 797214/1000000: episode: 1248, duration: 5.036s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 797824/1000000: episode: 1249, duration: 4.923s, episode steps: 610, steps per second: 124, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.574434, mean_absolute_error: 0.014038, acc: 0.998358, mean_q: 1.000000\n", " 798454/1000000: episode: 1250, duration: 5.066s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 799084/1000000: episode: 1251, duration: 5.035s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 799712/1000000: episode: 1252, duration: 5.059s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 800338/1000000: episode: 1253, duration: 5.021s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 800954/1000000: episode: 1254, duration: 4.950s, episode steps: 616, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.568830, mean_absolute_error: 0.013911, acc: 0.998374, mean_q: 1.000000\n", " 801573/1000000: episode: 1255, duration: 4.976s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 802194/1000000: episode: 1256, duration: 4.943s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 802817/1000000: episode: 1257, duration: 4.992s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 803449/1000000: episode: 1258, duration: 5.041s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 804079/1000000: episode: 1259, duration: 5.048s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 804711/1000000: episode: 1260, duration: 5.040s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 805334/1000000: episode: 1261, duration: 4.981s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 805960/1000000: episode: 1262, duration: 5.004s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 806591/1000000: episode: 1263, duration: 5.039s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 807219/1000000: episode: 1264, duration: 5.020s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 807841/1000000: episode: 1265, duration: 4.996s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 808465/1000000: episode: 1266, duration: 4.988s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 809077/1000000: episode: 1267, duration: 4.905s, episode steps: 612, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000\n", " 809711/1000000: episode: 1268, duration: 5.065s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 810340/1000000: episode: 1269, duration: 5.044s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 810971/1000000: episode: 1270, duration: 5.041s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 811606/1000000: episode: 1271, duration: 5.086s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 812244/1000000: episode: 1272, duration: 5.397s, episode steps: 638, steps per second: 118, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 812863/1000000: episode: 1273, duration: 5.089s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 813493/1000000: episode: 1274, duration: 5.050s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 814124/1000000: episode: 1275, duration: 5.076s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.572732, mean_absolute_error: 0.014134, acc: 0.996825, mean_q: 1.000000\n", " 814747/1000000: episode: 1276, duration: 4.996s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 815364/1000000: episode: 1277, duration: 4.961s, episode steps: 617, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000\n", " 815988/1000000: episode: 1278, duration: 4.998s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 816618/1000000: episode: 1279, duration: 5.080s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 817250/1000000: episode: 1280, duration: 5.058s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 817863/1000000: episode: 1281, duration: 4.897s, episode steps: 613, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000\n", " 818484/1000000: episode: 1282, duration: 4.960s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 819113/1000000: episode: 1283, duration: 5.048s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 819746/1000000: episode: 1284, duration: 5.073s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 820365/1000000: episode: 1285, duration: 4.967s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 820989/1000000: episode: 1286, duration: 5.030s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 821603/1000000: episode: 1287, duration: 4.936s, episode steps: 614, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 822220/1000000: episode: 1288, duration: 4.923s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000\n", " 822839/1000000: episode: 1289, duration: 5.028s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 823471/1000000: episode: 1290, duration: 5.103s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 824108/1000000: episode: 1291, duration: 5.098s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 824729/1000000: episode: 1292, duration: 4.983s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 825348/1000000: episode: 1293, duration: 4.937s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.882 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 825972/1000000: episode: 1294, duration: 5.000s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 826596/1000000: episode: 1295, duration: 4.988s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 827223/1000000: episode: 1296, duration: 5.040s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 827845/1000000: episode: 1297, duration: 4.984s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 828480/1000000: episode: 1298, duration: 5.125s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 829114/1000000: episode: 1299, duration: 5.066s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 829749/1000000: episode: 1300, duration: 5.105s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 830378/1000000: episode: 1301, duration: 5.002s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 831016/1000000: episode: 1302, duration: 5.073s, episode steps: 638, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 831638/1000000: episode: 1303, duration: 4.965s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 832267/1000000: episode: 1304, duration: 5.010s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 832899/1000000: episode: 1305, duration: 5.025s, episode steps: 632, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 833540/1000000: episode: 1306, duration: 5.166s, episode steps: 641, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 834172/1000000: episode: 1307, duration: 5.047s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 834799/1000000: episode: 1308, duration: 5.030s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 835440/1000000: episode: 1309, duration: 5.091s, episode steps: 641, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 836068/1000000: episode: 1310, duration: 5.007s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 836695/1000000: episode: 1311, duration: 5.019s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 837323/1000000: episode: 1312, duration: 5.011s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 837949/1000000: episode: 1313, duration: 4.996s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 838576/1000000: episode: 1314, duration: 5.012s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 839204/1000000: episode: 1315, duration: 5.027s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 839836/1000000: episode: 1316, duration: 5.036s, episode steps: 632, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 840459/1000000: episode: 1317, duration: 4.958s, episode steps: 623, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 841088/1000000: episode: 1318, duration: 4.997s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 841705/1000000: episode: 1319, duration: 4.919s, episode steps: 617, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 842341/1000000: episode: 1320, duration: 5.055s, episode steps: 636, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 842979/1000000: episode: 1321, duration: 5.075s, episode steps: 638, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.986 [2.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.552295, mean_absolute_error: 0.014505, acc: 0.995290, mean_q: 1.000000\n", " 843604/1000000: episode: 1322, duration: 4.994s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 844232/1000000: episode: 1323, duration: 5.010s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 844868/1000000: episode: 1324, duration: 5.056s, episode steps: 636, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 845492/1000000: episode: 1325, duration: 5.029s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 846124/1000000: episode: 1326, duration: 5.048s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 846753/1000000: episode: 1327, duration: 5.005s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 847385/1000000: episode: 1328, duration: 5.043s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 848015/1000000: episode: 1329, duration: 5.041s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 848638/1000000: episode: 1330, duration: 4.965s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 849259/1000000: episode: 1331, duration: 4.940s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 849892/1000000: episode: 1332, duration: 5.047s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 850503/1000000: episode: 1333, duration: 4.853s, episode steps: 611, steps per second: 126, episode reward: 70.000, mean reward: 0.115 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.573493, mean_absolute_error: 0.014016, acc: 0.998361, mean_q: 1.000000\n", " 851128/1000000: episode: 1334, duration: 4.979s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 851757/1000000: episode: 1335, duration: 5.021s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 852393/1000000: episode: 1336, duration: 5.068s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 853019/1000000: episode: 1337, duration: 5.005s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 853647/1000000: episode: 1338, duration: 5.060s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 854268/1000000: episode: 1339, duration: 4.992s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 854889/1000000: episode: 1340, duration: 4.947s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 855515/1000000: episode: 1341, duration: 4.991s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 856145/1000000: episode: 1342, duration: 5.031s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 856779/1000000: episode: 1343, duration: 5.085s, episode steps: 634, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 857404/1000000: episode: 1344, duration: 5.002s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.562213, mean_absolute_error: 0.014256, acc: 0.996795, mean_q: 1.000000\n", " 858026/1000000: episode: 1345, duration: 4.987s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 858654/1000000: episode: 1346, duration: 5.018s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 859269/1000000: episode: 1347, duration: 4.921s, episode steps: 615, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 859883/1000000: episode: 1348, duration: 4.911s, episode steps: 614, steps per second: 125, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 860508/1000000: episode: 1349, duration: 4.996s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 861144/1000000: episode: 1350, duration: 5.068s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 861773/1000000: episode: 1351, duration: 5.021s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 862414/1000000: episode: 1352, duration: 5.109s, episode steps: 641, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 863035/1000000: episode: 1353, duration: 4.971s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 863661/1000000: episode: 1354, duration: 4.982s, episode steps: 626, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 864286/1000000: episode: 1355, duration: 4.999s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 864917/1000000: episode: 1356, duration: 5.035s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 865537/1000000: episode: 1357, duration: 5.037s, episode steps: 620, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 866164/1000000: episode: 1358, duration: 5.046s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 866794/1000000: episode: 1359, duration: 5.041s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 867423/1000000: episode: 1360, duration: 5.035s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 868051/1000000: episode: 1361, duration: 5.001s, episode steps: 628, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 868686/1000000: episode: 1362, duration: 5.080s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 869323/1000000: episode: 1363, duration: 5.110s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 869948/1000000: episode: 1364, duration: 4.978s, episode steps: 625, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 870569/1000000: episode: 1365, duration: 4.954s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 871198/1000000: episode: 1366, duration: 5.022s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.981 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560209, mean_absolute_error: 0.014697, acc: 0.995223, mean_q: 1.000000\n", " 871829/1000000: episode: 1367, duration: 5.055s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 872455/1000000: episode: 1368, duration: 5.007s, episode steps: 626, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 873084/1000000: episode: 1369, duration: 5.023s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 873711/1000000: episode: 1370, duration: 5.000s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.997 [6.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 874347/1000000: episode: 1371, duration: 5.074s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 874971/1000000: episode: 1372, duration: 4.968s, episode steps: 624, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 875595/1000000: episode: 1373, duration: 4.968s, episode steps: 624, steps per second: 126, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 876233/1000000: episode: 1374, duration: 5.097s, episode steps: 638, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 876856/1000000: episode: 1375, duration: 4.969s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 877491/1000000: episode: 1376, duration: 5.064s, episode steps: 635, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 878111/1000000: episode: 1377, duration: 4.951s, episode steps: 620, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 878750/1000000: episode: 1378, duration: 5.080s, episode steps: 639, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 879369/1000000: episode: 1379, duration: 4.942s, episode steps: 619, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 880000/1000000: episode: 1380, duration: 5.031s, episode steps: 631, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 880624/1000000: episode: 1381, duration: 5.004s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 881260/1000000: episode: 1382, duration: 5.058s, episode steps: 636, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 881901/1000000: episode: 1383, duration: 5.073s, episode steps: 641, steps per second: 126, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 882525/1000000: episode: 1384, duration: 5.003s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 883438/1000000: episode: 1385, duration: 7.262s, episode steps: 913, steps per second: 126, episode reward: 840.000, mean reward: 0.920 [0.000, 400.000], mean action: 7.992 [1.000, 8.000], mean observation: 72.786 [0.000, 228.000], loss: 112.053879, mean_absolute_error: 0.103879, acc: 0.997807, mean_q: 1.000000\n", " 884059/1000000: episode: 1386, duration: 4.934s, episode steps: 621, steps per second: 126, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 884687/1000000: episode: 1387, duration: 5.032s, episode steps: 628, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 885317/1000000: episode: 1388, duration: 5.021s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 885947/1000000: episode: 1389, duration: 5.020s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 886580/1000000: episode: 1390, duration: 5.070s, episode steps: 633, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 887202/1000000: episode: 1391, duration: 5.495s, episode steps: 622, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564929, mean_absolute_error: 0.014319, acc: 0.996779, mean_q: 1.000000\n", " 887825/1000000: episode: 1392, duration: 4.980s, episode steps: 623, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 888458/1000000: episode: 1393, duration: 5.028s, episode steps: 633, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 889097/1000000: episode: 1394, duration: 5.060s, episode steps: 639, steps per second: 126, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 890126/1000000: episode: 1395, duration: 8.212s, episode steps: 1029, steps per second: 125, episode reward: 440.000, mean reward: 0.428 [0.000, 200.000], mean action: 7.993 [1.000, 8.000], mean observation: 72.770 [0.000, 228.000], loss: 21.592552, mean_absolute_error: 0.049051, acc: 0.998054, mean_q: 1.000000\n", " 890748/1000000: episode: 1396, duration: 4.987s, episode steps: 622, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 891387/1000000: episode: 1397, duration: 5.105s, episode steps: 639, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 892032/1000000: episode: 1398, duration: 5.147s, episode steps: 645, steps per second: 125, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.543218, mean_absolute_error: 0.013335, acc: 0.998447, mean_q: 1.000000\n", " 892661/1000000: episode: 1399, duration: 4.993s, episode steps: 629, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 893282/1000000: episode: 1400, duration: 4.966s, episode steps: 621, steps per second: 125, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 893906/1000000: episode: 1401, duration: 4.983s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 894537/1000000: episode: 1402, duration: 5.028s, episode steps: 631, steps per second: 126, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 895161/1000000: episode: 1403, duration: 5.001s, episode steps: 624, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 895786/1000000: episode: 1404, duration: 4.993s, episode steps: 625, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 896418/1000000: episode: 1405, duration: 5.059s, episode steps: 632, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 897047/1000000: episode: 1406, duration: 5.022s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 897677/1000000: episode: 1407, duration: 5.045s, episode steps: 630, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 898307/1000000: episode: 1408, duration: 5.084s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 898944/1000000: episode: 1409, duration: 5.097s, episode steps: 637, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000\n", " 899564/1000000: episode: 1410, duration: 4.985s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 900197/1000000: episode: 1411, duration: 5.603s, episode steps: 633, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 900828/1000000: episode: 1412, duration: 5.294s, episode steps: 631, steps per second: 119, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 901464/1000000: episode: 1413, duration: 5.259s, episode steps: 636, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 902100/1000000: episode: 1414, duration: 5.287s, episode steps: 636, steps per second: 120, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000\n", " 902722/1000000: episode: 1415, duration: 5.427s, episode steps: 622, steps per second: 115, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 903344/1000000: episode: 1416, duration: 5.294s, episode steps: 622, steps per second: 117, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 903972/1000000: episode: 1417, duration: 5.573s, episode steps: 628, steps per second: 113, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 904599/1000000: episode: 1418, duration: 5.044s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 905228/1000000: episode: 1419, duration: 5.013s, episode steps: 629, steps per second: 125, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 905864/1000000: episode: 1420, duration: 5.101s, episode steps: 636, steps per second: 125, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552475, mean_absolute_error: 0.014028, acc: 0.996850, mean_q: 1.000000\n", " 906491/1000000: episode: 1421, duration: 5.031s, episode steps: 627, steps per second: 125, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 907119/1000000: episode: 1422, duration: 8.164s, episode steps: 628, steps per second: 77, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 907751/1000000: episode: 1423, duration: 5.771s, episode steps: 632, steps per second: 110, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 908375/1000000: episode: 1424, duration: 42.594s, episode steps: 624, steps per second: 15, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 909009/1000000: episode: 1425, duration: 8.766s, episode steps: 634, steps per second: 72, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 909645/1000000: episode: 1426, duration: 5.533s, episode steps: 636, steps per second: 115, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 910273/1000000: episode: 1427, duration: 5.192s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 910885/1000000: episode: 1428, duration: 4.955s, episode steps: 612, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000\n", " 911506/1000000: episode: 1429, duration: 5.088s, episode steps: 621, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 912142/1000000: episode: 1430, duration: 5.168s, episode steps: 636, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 912771/1000000: episode: 1431, duration: 5.651s, episode steps: 629, steps per second: 111, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 913394/1000000: episode: 1432, duration: 5.878s, episode steps: 623, steps per second: 106, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 914013/1000000: episode: 1433, duration: 5.492s, episode steps: 619, steps per second: 113, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 914626/1000000: episode: 1434, duration: 4.973s, episode steps: 613, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000\n", " 915244/1000000: episode: 1435, duration: 5.005s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 915859/1000000: episode: 1436, duration: 5.006s, episode steps: 615, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 916482/1000000: episode: 1437, duration: 5.262s, episode steps: 623, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n", " 917121/1000000: episode: 1438, duration: 5.430s, episode steps: 639, steps per second: 118, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549877, mean_absolute_error: 0.013967, acc: 0.996865, mean_q: 1.000000\n", " 917746/1000000: episode: 1439, duration: 5.107s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 918373/1000000: episode: 1440, duration: 5.110s, episode steps: 627, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 919004/1000000: episode: 1441, duration: 5.103s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 919632/1000000: episode: 1442, duration: 5.094s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 920265/1000000: episode: 1443, duration: 5.116s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 920891/1000000: episode: 1444, duration: 5.066s, episode steps: 626, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 921511/1000000: episode: 1445, duration: 5.087s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.998 [7.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.566754, mean_absolute_error: 0.014362, acc: 0.996769, mean_q: 1.000000\n", " 922132/1000000: episode: 1446, duration: 5.247s, episode steps: 621, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 922757/1000000: episode: 1447, duration: 5.114s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 923385/1000000: episode: 1448, duration: 5.237s, episode steps: 628, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 924022/1000000: episode: 1449, duration: 5.251s, episode steps: 637, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000\n", " 924654/1000000: episode: 1450, duration: 5.176s, episode steps: 632, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 925477/1000000: episode: 1451, duration: 6.716s, episode steps: 823, steps per second: 123, episode reward: 240.000, mean reward: 0.292 [0.000, 50.000], mean action: 7.991 [1.000, 8.000], mean observation: 72.800 [0.000, 228.000], loss: 2.675342, mean_absolute_error: 0.034034, acc: 0.997567, mean_q: 1.000000\n", " 926117/1000000: episode: 1452, duration: 5.475s, episode steps: 640, steps per second: 117, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 926738/1000000: episode: 1453, duration: 5.389s, episode steps: 621, steps per second: 115, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 927373/1000000: episode: 1454, duration: 5.585s, episode steps: 635, steps per second: 114, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 928007/1000000: episode: 1455, duration: 5.203s, episode steps: 634, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 928637/1000000: episode: 1456, duration: 5.373s, episode steps: 630, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 929266/1000000: episode: 1457, duration: 5.329s, episode steps: 629, steps per second: 118, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 929896/1000000: episode: 1458, duration: 5.095s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 930530/1000000: episode: 1459, duration: 5.135s, episode steps: 634, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 931161/1000000: episode: 1460, duration: 5.116s, episode steps: 631, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.995 [5.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.556859, mean_absolute_error: 0.014131, acc: 0.996825, mean_q: 1.000000\n", " 931783/1000000: episode: 1461, duration: 5.121s, episode steps: 622, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 932405/1000000: episode: 1462, duration: 5.046s, episode steps: 622, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 933029/1000000: episode: 1463, duration: 5.058s, episode steps: 624, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 933665/1000000: episode: 1464, duration: 5.144s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 934290/1000000: episode: 1465, duration: 5.046s, episode steps: 625, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 934920/1000000: episode: 1466, duration: 5.092s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 935540/1000000: episode: 1467, duration: 5.011s, episode steps: 620, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 936177/1000000: episode: 1468, duration: 5.193s, episode steps: 637, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.551606, mean_absolute_error: 0.014008, acc: 0.996855, mean_q: 1.000000\n", " 936801/1000000: episode: 1469, duration: 5.278s, episode steps: 624, steps per second: 118, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 937415/1000000: episode: 1470, duration: 4.973s, episode steps: 614, steps per second: 123, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.570686, mean_absolute_error: 0.013953, acc: 0.998369, mean_q: 1.000000\n", " 938042/1000000: episode: 1471, duration: 5.071s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 7.992 [3.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560417, mean_absolute_error: 0.014214, acc: 0.996805, mean_q: 1.000000\n", " 938673/1000000: episode: 1472, duration: 5.109s, episode steps: 631, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 939307/1000000: episode: 1473, duration: 5.135s, episode steps: 634, steps per second: 123, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.552657, mean_absolute_error: 0.013547, acc: 0.998420, mean_q: 1.000000\n", " 939929/1000000: episode: 1474, duration: 5.260s, episode steps: 622, steps per second: 118, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 940787/1000000: episode: 1475, duration: 7.108s, episode steps: 858, steps per second: 121, episode reward: 440.000, mean reward: 0.513 [0.000, 200.000], mean action: 7.998 [6.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 25.900974, mean_absolute_error: 0.058617, acc: 0.997666, mean_q: 1.000000\n", " 941407/1000000: episode: 1476, duration: 5.145s, episode steps: 620, steps per second: 120, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 942034/1000000: episode: 1477, duration: 5.198s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 942666/1000000: episode: 1478, duration: 5.215s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 943288/1000000: episode: 1479, duration: 5.114s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 943908/1000000: episode: 1480, duration: 5.107s, episode steps: 620, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 944549/1000000: episode: 1481, duration: 5.290s, episode steps: 641, steps per second: 121, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.546612, mean_absolute_error: 0.013411, acc: 0.998437, mean_q: 1.000000\n", " 945171/1000000: episode: 1482, duration: 5.111s, episode steps: 622, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 945802/1000000: episode: 1483, duration: 5.186s, episode steps: 631, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 946441/1000000: episode: 1484, duration: 5.245s, episode steps: 639, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 947064/1000000: episode: 1485, duration: 5.128s, episode steps: 623, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 947684/1000000: episode: 1486, duration: 5.095s, episode steps: 620, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.565155, mean_absolute_error: 0.013829, acc: 0.998384, mean_q: 1.000000\n", " 948309/1000000: episode: 1487, duration: 5.144s, episode steps: 625, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 948927/1000000: episode: 1488, duration: 5.026s, episode steps: 618, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 949555/1000000: episode: 1489, duration: 5.095s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 950183/1000000: episode: 1490, duration: 5.107s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 950796/1000000: episode: 1491, duration: 4.963s, episode steps: 613, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.571619, mean_absolute_error: 0.013974, acc: 0.998366, mean_q: 1.000000\n", " 951424/1000000: episode: 1492, duration: 5.080s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 952061/1000000: episode: 1493, duration: 5.157s, episode steps: 637, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 952697/1000000: episode: 1494, duration: 5.138s, episode steps: 636, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 953327/1000000: episode: 1495, duration: 5.098s, episode steps: 630, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 953960/1000000: episode: 1496, duration: 5.113s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 954588/1000000: episode: 1497, duration: 5.081s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 955207/1000000: episode: 1498, duration: 5.007s, episode steps: 619, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 955822/1000000: episode: 1499, duration: 4.979s, episode steps: 615, steps per second: 124, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.569757, mean_absolute_error: 0.013932, acc: 0.998371, mean_q: 1.000000\n", " 956443/1000000: episode: 1500, duration: 5.126s, episode steps: 621, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 957074/1000000: episode: 1501, duration: 5.222s, episode steps: 631, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 957711/1000000: episode: 1502, duration: 5.283s, episode steps: 637, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.550050, mean_absolute_error: 0.013489, acc: 0.998428, mean_q: 1.000000\n", " 958328/1000000: episode: 1503, duration: 5.092s, episode steps: 617, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.569514, mean_absolute_error: 0.014426, acc: 0.996753, mean_q: 1.000000\n", " 958956/1000000: episode: 1504, duration: 5.188s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.990 [2.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.559523, mean_absolute_error: 0.014193, acc: 0.996810, mean_q: 1.000000\n", " 959580/1000000: episode: 1505, duration: 5.167s, episode steps: 624, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 960216/1000000: episode: 1506, duration: 5.244s, episode steps: 636, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.550916, mean_absolute_error: 0.013508, acc: 0.998425, mean_q: 1.000000\n", " 960838/1000000: episode: 1507, duration: 5.127s, episode steps: 622, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 961471/1000000: episode: 1508, duration: 5.215s, episode steps: 633, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 962105/1000000: episode: 1509, duration: 5.234s, episode steps: 634, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 7.991 [2.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.554220, mean_absolute_error: 0.014069, acc: 0.996840, mean_q: 1.000000\n", " 962723/1000000: episode: 1510, duration: 5.115s, episode steps: 618, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566987, mean_absolute_error: 0.013870, acc: 0.998379, mean_q: 1.000000\n", " 963350/1000000: episode: 1511, duration: 5.193s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 963973/1000000: episode: 1512, duration: 5.141s, episode steps: 623, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.562429, mean_absolute_error: 0.013767, acc: 0.998392, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 964595/1000000: episode: 1513, duration: 5.168s, episode steps: 622, steps per second: 120, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 965223/1000000: episode: 1514, duration: 5.175s, episode steps: 628, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 965852/1000000: episode: 1515, duration: 5.163s, episode steps: 629, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 966479/1000000: episode: 1516, duration: 5.169s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 967108/1000000: episode: 1517, duration: 5.147s, episode steps: 629, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.558633, mean_absolute_error: 0.014172, acc: 0.996815, mean_q: 1.000000\n", " 967748/1000000: episode: 1518, duration: 5.173s, episode steps: 640, steps per second: 124, episode reward: 70.000, mean reward: 0.109 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.547468, mean_absolute_error: 0.013431, acc: 0.998435, mean_q: 1.000000\n", " 968378/1000000: episode: 1519, duration: 5.378s, episode steps: 630, steps per second: 117, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 969013/1000000: episode: 1520, duration: 5.241s, episode steps: 635, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 969630/1000000: episode: 1521, duration: 5.096s, episode steps: 617, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 970255/1000000: episode: 1522, duration: 5.176s, episode steps: 625, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 970880/1000000: episode: 1523, duration: 5.147s, episode steps: 625, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.560627, mean_absolute_error: 0.013727, acc: 0.998397, mean_q: 1.000000\n", " 971518/1000000: episode: 1524, duration: 5.242s, episode steps: 638, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 972135/1000000: episode: 1525, duration: 5.084s, episode steps: 617, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 972754/1000000: episode: 1526, duration: 5.090s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 973384/1000000: episode: 1527, duration: 5.171s, episode steps: 630, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.556171, mean_absolute_error: 0.013627, acc: 0.998410, mean_q: 1.000000\n", " 974008/1000000: episode: 1528, duration: 5.156s, episode steps: 624, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 974635/1000000: episode: 1529, duration: 5.168s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 975261/1000000: episode: 1530, duration: 5.171s, episode steps: 626, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 975882/1000000: episode: 1531, duration: 5.120s, episode steps: 621, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 976504/1000000: episode: 1532, duration: 5.134s, episode steps: 622, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.563335, mean_absolute_error: 0.013788, acc: 0.998390, mean_q: 1.000000\n", " 977142/1000000: episode: 1533, duration: 5.247s, episode steps: 638, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 977768/1000000: episode: 1534, duration: 5.139s, episode steps: 626, steps per second: 122, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 978395/1000000: episode: 1535, duration: 5.171s, episode steps: 627, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 979019/1000000: episode: 1536, duration: 5.160s, episode steps: 624, steps per second: 121, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 979649/1000000: episode: 1537, duration: 5.191s, episode steps: 630, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 7.994 [4.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557745, mean_absolute_error: 0.014151, acc: 0.996820, mean_q: 1.000000\n", " 980261/1000000: episode: 1538, duration: 5.061s, episode steps: 612, steps per second: 121, episode reward: 70.000, mean reward: 0.114 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.572554, mean_absolute_error: 0.013995, acc: 0.998363, mean_q: 1.000000\n", " 980880/1000000: episode: 1539, duration: 5.077s, episode steps: 619, steps per second: 122, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 981501/1000000: episode: 1540, duration: 5.114s, episode steps: 621, steps per second: 121, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 7.987 [0.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.565840, mean_absolute_error: 0.014341, acc: 0.996774, mean_q: 1.000000\n", " 982139/1000000: episode: 1541, duration: 5.248s, episode steps: 638, steps per second: 122, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.549186, mean_absolute_error: 0.013469, acc: 0.998430, mean_q: 1.000000\n", " 982768/1000000: episode: 1542, duration: 5.177s, episode steps: 629, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.888 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 983407/1000000: episode: 1543, duration: 5.293s, episode steps: 639, steps per second: 121, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.548326, mean_absolute_error: 0.013450, acc: 0.998433, mean_q: 1.000000\n", " 984038/1000000: episode: 1544, duration: 5.189s, episode steps: 631, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.555288, mean_absolute_error: 0.013607, acc: 0.998413, mean_q: 1.000000\n", " 984670/1000000: episode: 1545, duration: 5.240s, episode steps: 632, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 985291/1000000: episode: 1546, duration: 5.565s, episode steps: 621, steps per second: 112, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 985920/1000000: episode: 1547, duration: 5.219s, episode steps: 629, steps per second: 121, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 986553/1000000: episode: 1548, duration: 5.261s, episode steps: 633, steps per second: 120, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.895 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 987181/1000000: episode: 1549, duration: 5.098s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 987813/1000000: episode: 1550, duration: 5.126s, episode steps: 632, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n", " 988434/1000000: episode: 1551, duration: 5.033s, episode steps: 621, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 989062/1000000: episode: 1552, duration: 5.072s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.884 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 989697/1000000: episode: 1553, duration: 5.121s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 990316/1000000: episode: 1554, duration: 5.016s, episode steps: 619, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.566069, mean_absolute_error: 0.013849, acc: 0.998382, mean_q: 1.000000\n", " 990949/1000000: episode: 1555, duration: 5.125s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", " 991577/1000000: episode: 1556, duration: 5.152s, episode steps: 628, steps per second: 122, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 992198/1000000: episode: 1557, duration: 5.175s, episode steps: 621, steps per second: 120, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 992815/1000000: episode: 1558, duration: 5.014s, episode steps: 617, steps per second: 123, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.890 [0.000, 228.000], loss: 0.567907, mean_absolute_error: 0.013891, acc: 0.998377, mean_q: 1.000000\n", " 993443/1000000: episode: 1559, duration: 5.096s, episode steps: 628, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 994072/1000000: episode: 1560, duration: 5.097s, episode steps: 629, steps per second: 123, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.887 [0.000, 228.000], loss: 0.557056, mean_absolute_error: 0.013646, acc: 0.998408, mean_q: 1.000000\n", " 994696/1000000: episode: 1561, duration: 5.043s, episode steps: 624, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.889 [0.000, 228.000], loss: 0.561527, mean_absolute_error: 0.013747, acc: 0.998395, mean_q: 1.000000\n", " 995317/1000000: episode: 1562, duration: 5.024s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 995952/1000000: episode: 1563, duration: 5.129s, episode steps: 635, steps per second: 124, episode reward: 70.000, mean reward: 0.110 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.551785, mean_absolute_error: 0.013528, acc: 0.998423, mean_q: 1.000000\n", " 996573/1000000: episode: 1564, duration: 5.026s, episode steps: 621, steps per second: 124, episode reward: 70.000, mean reward: 0.113 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.886 [0.000, 228.000], loss: 0.564243, mean_absolute_error: 0.013808, acc: 0.998387, mean_q: 1.000000\n", " 997199/1000000: episode: 1565, duration: 5.080s, episode steps: 626, steps per second: 123, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.559730, mean_absolute_error: 0.013707, acc: 0.998400, mean_q: 1.000000\n", " 997831/1000000: episode: 1566, duration: 5.106s, episode steps: 632, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.554408, mean_absolute_error: 0.013587, acc: 0.998415, mean_q: 1.000000\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 998458/1000000: episode: 1567, duration: 5.075s, episode steps: 627, steps per second: 124, episode reward: 70.000, mean reward: 0.112 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.558836, mean_absolute_error: 0.013687, acc: 0.998403, mean_q: 1.000000\n", " 999086/1000000: episode: 1568, duration: 5.077s, episode steps: 628, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.891 [0.000, 228.000], loss: 0.557945, mean_absolute_error: 0.013666, acc: 0.998405, mean_q: 1.000000\n", " 999719/1000000: episode: 1569, duration: 5.117s, episode steps: 633, steps per second: 124, episode reward: 70.000, mean reward: 0.111 [0.000, 10.000], mean action: 8.000 [8.000, 8.000], mean observation: 72.894 [0.000, 228.000], loss: 0.553531, mean_absolute_error: 0.013567, acc: 0.998418, mean_q: 1.000000\n", "done, took 8629.718 seconds\n", "Testing for 10 episodes ...\n", "Episode 1: reward: 70.000, steps: 628\n", "Episode 2: reward: 70.000, steps: 638\n", "Episode 3: reward: 70.000, steps: 628\n", "Episode 4: reward: 70.000, steps: 630\n", "Episode 5: reward: 70.000, steps: 625\n", "Episode 6: reward: 70.000, steps: 630\n", "Episode 7: reward: 70.000, steps: 618\n", "Episode 8: reward: 70.000, steps: 634\n", "Episode 9: reward: 70.000, steps: 624\n", "Episode 10: reward: 70.000, steps: 633\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sarsa = SARSAAgent(model, nb_actions, \n", " policy=policy, test_policy=None, \n", " gamma=0.99, nb_steps_warmup=10, \n", " train_interval=1)\n", "sarsa.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])\n", "sarsa.fit(env, nb_steps=1000000, visualize=True, verbose=2)\n", "sarsa.test(env, nb_episodes=10, visualize=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJztnXl0FFX2x78vCSFAICGBMOwEQVZxQBCRRQZBBVRwQXFcEFHG0VHwx6CI+zYu44iiuIKCyggIDqDIJqIICsjiwr7vEAKERSAJ6bzfH7ffqerqbuika+m8vp9zcrrqVaff7a6qW/fdd9+9QkoJhmEYRl8SvBaAYRiGcRZW9AzDMJrDip5hGEZzWNEzDMNoDit6hmEYzWFFzzAMozms6BmGYTSHFT3DMIzmsKJnGIbRnCSvBQCAatWqyQYNGngtxrk5cgSoUgVIiomfjXGaPXuAnBzgT38CatcOPLZqFZCVBdSp441sDANg5cqVh6SU1c/1vpjQWA0aNMCKFSu8FuPs7NtHN3uXLsD333stDeMWFSoAt98OvPKK0bZ2LdCyJdCvH/Dmm+7Kc8cdQEICMH68u/0yMYkQYmck74sJRV8myM+n1127vJWDcZfHHwcuvjiwbe5ceu3SxX15tm8HkpPd75cp07CPPlJSUuh1yBBv5WDc4733gNxcoEePwHafj1579nRfpqIidh0yJYYVfaRICWRkAJmZXkvCuMWSJcD77wN5eYHtStGfOOG+TEuXAvPmud8vU6ZhRR8ptWoB995Lr0x84PMBp08DI0cGtwPB7QwTo7CijxQhgBdf5InYeEIpdPWqKCpyXxaGiQJW9JGyZw+5b777zmtJGLcIp+j//nd6LS52Vx4AaNGCIoEYpgTwrE6kHDtGr7t3eysH4x6VK9Or1YLPygLOOy/4AeAGLVrwiIIpMWzRR4q6uYTwVg7GPT78EGjYMFihL14MbN3qjcJdtAjYscP9fpkyDVv0kcJWVHwyciRQs2Zg2/Tp9Hrbbe7Lk5wM3HKL+/0yZRpW9JGirLrXX/dWDsY9nnqKFsoNGhTY7vNRKoyrr3ZfpqIiIDHR/X6ZMg27biIlKQmoXx9IS/NaEsYtliwBJk6kiXgzRUXA8ePB7W6wbx8wbpz7/TJlGlb0kdKmDTBggDc3N+MNPh+wdy9w//3B7QDwt7+5LxPDlAJW9CVh/Hhg/nyvpWDc4lxx9F5E3TBMKWBFHykrV1JCs99+81oSxi2UIrdOxL/4IqUn9mKCvk8f9/tkyjznVPRCiA+FEAeFEGtMbRlCiPlCiM3+16r+diGEGC2E2CKE+E0I0cZJ4V0lN5derXlPGH1R0TZWyz0zE8jO9saib9wYqFjR/X7dJj+fI91sJBKLfjyAqyxtIwAskFI2BrDAvw8APQE09v8NBvCOPWLGAHzRxR9TpwKdOgUr9BkzgB9+8EbRT5kCnDrlfr9uU6EC0Lev11JowznDK6WUi4QQDSzNfQB09W9PAPAdgEf87R9LKSWApUKIdCFETSnlfrsE9gx1U3OK2Pji4YeDF8lNmkSvw4e7L8/u3cATT7jfrxfMmuW1BNpQWh99DZPyPgCghn+7NgBzjoA9/rayj7Lop03zVg7GPQYNorkZa7y8zwc0bQpcc4278hQXU74ljqNnSkjUk7F+612W9P+EEIOFECuEECtylf87lklNpTwj8eAfZYilS8lNs25dYHtREaUhWLvWXXnUqPLpp/WP+MnOBu6+22sptKG0ij5HCFETAPyvB/3tewHUNb2vjr8tCCnl+1LKtlLKttWrn7O2rfdceSUtPVfL3xn98fmAX36h9RPW9vx891MgmOeJdJ8z2rYN+OADr6XQhtIq+pkA1NU/AMAMU/sd/uibSwAc08I/r5g9G5gzx2spGLcIF0cfLuzSacz96WzRS0musTFjvJZEGyIJr/wMwE8Amggh9gghBgF4CUAPIcRmAN39+wDwNYBtALYA+ADAfY5I7QVffklL4veGHKAwOqIUq1Whf/YZ0L27+8q2UiXgkUdCy6QTPh+wcSPw7LNeS6INkUTdhEuVd3mI90oA94d4b9nnwAF6LSjwVg7GPZo1I1+8VaFXrgxUq0YL6NwkIcEoZamzolff7eDBs7+PiRheGRsp6uKTJZ53ZsoqX38N3HRTsKL/4AMKsXRb2Z44QX2npZHS1xWdH2IewUHhkaIuPs5eGV88+CBw9Ghg26efAuXLA2+84a4sJ04Aa9YA770HpKe727ebsKK3HY3NAptRVh3XjI0funcHfvwR6N07sL2oiFbMup2PXilA3Rft6Txa8Qj+RSOlRg3gkkuAcuW8loRxi5UrgW++oUl4Mz4fhf8tXuyuPErRDxqkd+3iKlWAnj05gZuNaG4a2MgttwD791NpObeH7Iw3+HzAvHm0MMpch6CoCNi+Hbj+encnDM1zBboHBXz9tdcSaAVb9CVh+XKOo48nwoVXchy9s+zZQ2mgp0zxWhJtYEUfKW+/DUyeDJw547UkjFuEWzC1fDlw333uK9smTaj4DaD3hGVBAa1XGTLEa0m0gRV9pOz3L/AtLvZWDsY9Onak6BqrQi9XjtLouq3oExIohh/Q26JXDzG1doWJGlb0keLVcJ3xjm+/Bf7+92Cl+swzwCefuK9sd+4E/v1vsuxTU93t2034HrMdnoyNFHXxqZWJTHxw993AVZa6O598Qj7k5593V5b9+ymj5tdfAw0butu3m7Citx226CPF5yMravlyryVh3ODMGbKcFy+mzKVmioqAli0pBNBN4qX4TZUqXkugHazoI6VxY1pAw8QHRUXApk1kQc+cGXhMxdHPmOFuSgxl6V5xBS3k0pXsbFqR3KWL15Jog+amgY3cey9V9unTh25wRm+UUp08mSJdzArd5yNLf/FioLDQvUV08RRHz2tVbIUt+pKweTMwf77XUjBuoJRq+fL0ao62MrtO3JyQNfelc9TNkiWUU2rRIq8l0QZW9JHy8MMU8cATRfGBUqTJyYH7AKUnfuWV4Han6dED+Okn2tb5OiwoAI4fB/r181oSbWBFHymqrq3OlhRjkJQE9OoFNGpE+9bzrgp0u61wverXTTgfve2woo8UdaMXF/OiqXggLQ2YNQvo25f2zYp+8GBg6tTgdqdZuRIYMQL4y1+A2rXd69dtdH6IeQQr+khRF1/Llqzo44n+/YHvvzd89QDF0WdnU7taqeoG27bRIq7Ro4HWrd3r121Y0dsOK/pIKSqi0nK//65/HDNDaYBr1SKfeJcuwROw9epRu5tpq+Mljr5uXa8l0A5W9JHSvj3FLzPxQWEhrURds4YqSp0+bRwrKqJ0BJ9+Cpw86Z5MytJt1owKlOtK69bA668DF17otSTawIo+UoYNAy69FOjQgUq6MXqjlOr33wO3326UE5SS/n78kdrVJL2bMgH6Z1EdMgT45RevpdAGVvQlISeHVkoWFnotCeM04cIrfT4gM9OoHezmZKzZZaOzH/uTTyi6aOtWryXRBlb0kXLjjbQsG9D7JmOIcIo+KQk4dAh45BHad/NauO02o9KVzmG+RUUU8NC5M5Cf77U0WqD5rI6NqKE7wIo+HqhSBbj5ZqBaNdoPF0fvtsKNpzj6/fvJRZWS4q08GsAWfaTEy/JzhqhfH5g0ieZkAEP5nDoFXHcdMHs27bt5LcyaBQwcCAwYALRo4V6/bmN+iOn8QHMRtugjRV1wHTroH97GGPTsSQuV6tWj/fx8YPp04OmngVWrjJWzbrBxI9UsPnrUmCPQEVb0tsMWfaQUFVGukR9/5OIj8cDSpeS++eUXoE0bw32gLPjMTAoDrFDBPZmU0pNSbwV44YXG763z93QRVvSR0qtXcAEKRl8KCymMdvt24J13gnMd7d8PvPuuu3VNVd9VqwKvvupev27TpYvhNlNzEkxUsKKPlCeeAFq1ohQImzZ5LQ3jNEqprlsH3HcfZaw0t2/aRPVk3QwBNFu3Os8TFRbS6HnJEiAry2tptIAVfUn44w9g7VqakGP0Jlx4pRBAgwZARkZguxukpgI1a9K2zi6Nl18GKlXinFI2woo+Ulq1opWQgN7WFEOEU/S1apE7569/DWx3g4ceAvbto4eNztegeoi1akWJ3JioiUrRCyEeEkKsFUKsEUJ8JoRIEUJkCyGWCSG2CCEmCyGS7RLWUwoKjGgbna0phqhdG7j7biMdsPWcexnPnpSk9zWovtu6dTx6tolSK3ohRG0ADwJoK6VsCSARQH8ALwMYJaVsBCAPwCA7BPWcoiIjVa3ONxlDtGwJfPABFYUHDAt61y4qEr94cWC7G3zwAXD99VTtTOfC2RxeaTvRBoQnAagghDgDoCKA/QC6AfCPazEBwNMA3omyH+/x+YA//YlCv6pU8Voaxi3at6eJ1zp1aP/4cWDBAlq0tHmzu6G269ZR31984V6fXsCK3nZKbdFLKfcCeBXALpCCPwZgJYCjUkp1dvYA0KMUTlER0LYtMG8ecMEFXkvDOM3UqeQL37aNrHoVL68s+NRUWixVsaJ7MhUVkcvo6FEKDNCVHj2Ajh1pmxW9LUTjuqkKoA+AbAC1AFQCcFUJ/n+wEGKFEGJFrpupXkvLwIHA5Zd7LQXjFkrB5OZSUXgVRqna8/KofeNG92Ty+cg/36gRlRTUlauuAt54g+o/uFnBS2OimYztDmC7lDJXSnkGwBcAOgJIF0Iol1AdAHtD/bOU8n0pZVspZdvq1atHIYZLPPcc0Lw5hdYtXOi1NIzTKMv9wAHyia9bF9h+6BC1//abezIpi173ydhjxyjX0Ny5euf0cZFoFP0uAJcIISoKIQSAywGsA7AQwI3+9wwAMCM6EWOE/HyKvNm5U+9hM0OEC68sX55cd5mZtO+mwq1RA2jShBS9zuGVw4bpXRPXA0o9GSulXCaEmApgFYAiAKsBvA9gFoBJQojn/W3j7BDUc6pXB9q1o22dbzKGMCt28/6FF5IVv3lzYLsbPPccvdavr7dFX1REefcbNADGjwe6dvVYoLJPVFE3UsqnADxlad4G4OJoPjcm8fk4vDKeaN4cGDrUWAFrPedqTYUXD33dXTfqu+3c6W5NXo3hfLuRwnH08UX79vSnJluVQl++nCqNPfYY7bt5LTz5JE0KDxtmLOTSEQ6vtB1W9JGiaoVef73eNxlDFBZSdaOGDSlTZXo6teflAcuW0f7+/e7mhV+/niaFJ050r08vYEVvO5zrJhKKi+mvXj1g2jSqZcnozbvvUqz88eO0UM6ajz45mdrdzkeflEQPmJwc9/p1m9tvB4YPp21W9LbAij5SRo5kBR9PKIV+8iS5TFasoH2leAoLqf2nn9yTSSn6Xr2AwYPd69dt+vSh+ZHrr+ciPzbBij4SEhKAF16gCbrMTIoEYPRGKfr8fIp2WbkysN3no/Zly9yTScXRJybqbenu2UMjaB492wb76COhuJgWyOTnA0eOcEa9eCBceGXVqlT5SPnm3Yy6Of98oyaCziG+d95J95pKHMdEDVv0kXDiBC1WGTuW9nW+yRhCWczWSKuuXalucJMmge1u8MYbwLhx+lv0RUVkWPHo2TZY0UeC1brT+SZjiE6dqHykdWWsQuWj9yqOXmdjo6gIKFeOR882wq6bSAhn3TH6ctll9HfiBO0rxTpjBk3Mz5oV2O4Gd9xB2TIffJAya+qKz2dEOfG9Zgus6CNBXWwVK1Ieck60pD/HjwOnT1Nx6j/+MCz7I0eMBGfmdjfYtIni92+4wb0+vaCoiBW9zbCijwRltVWsyD7DeOFf/wJGjaJEdpUqGe1K8SQlBba7gQqv3LWL5FLVr3RjxAhASpqMZUVvC+yjj4QqVYAXX6TCI0x84PORH764mIpyz51rtAOkcB9+GPjyS/dkUuGV//gHcPPN7vXrNjfcQLH0PHq2DVb0kZCWRlZGq1Zk1T/5pNcSMU6jinwIAbz+urEwSlmYiYnAW28BP/zgvky6R92sWQPs3Uuj5969vZZGC1jRR0JhISWTOnmSbrYzZ7yWiHEaZT0LQQvmlGKtW5cqH5Uv777Cbd+e0iTrHnVz3XVG0jjGFljRR8KmTVS+bfZs/a0phlCuG4BelWLt04fcOFWquK9wx46l0WQ8pClOSqJcQ48/7rU0WsCTsZFgHq7rbk0xxHXXUcoLIFDRm/Hqoa+7saEUPY+ebYMVfSSYJ+B0v8kYont3+gPovBcX0/ZbbwGvvEIVpsqVo+gQt7j0Usr9MngwPYh0JV5q47oIK/pIMFv0gwdz9E08cOAA5Vtp0IBi6tUCpaNHgd276VrYv99dmXbsoCiULl3c7ddt1KQzK3rbYEUfCWaL/uWXvZWFcYeHH6aImu3bA1ehmh/6bqMU4PbtQG4ucLF+FTsBAO+8Q5Pen3/Oit4meDI2ErKzacjerBn5DNlvqD/mydhhw4CPPzbahaC/kSONRHduoFwar76qd9jhDTfQQ+yee/QfvbgEK/pIqFkTuP9+sjIaNaILkNEbZT0DwGefGfHy5gfA558D337rvky6zxMtWkSjln/9S++FYS7Cij4Sjh8Hfv2VMulx1E18oKxnIDDqpkUL4MYbg9vdoHdvWrSnu+/68suBDz6g9SuFhV5LowWs6CNhyRLgz38Gfv9d/5uMIcLF0d96K1n41nY3mDgRuOsu/Y0NNXJp0QIYONBrabSAJ2MjwRpHz4pef+69l0ZywNnj6L1QuDq7boqLKWQ1HlxULsKKPhI4jj7+uPJKYzstzahFMHw48MUXlBKjShUjna7TSAlUr07RQLfeCrRr506/bmPODspGlW2woo8E88V3zz1UN5TRm82bSbmefz6wapXR/scfRjESN2uaFhcDhw9TbH/LlvSnI6zoHYEVfSSYXTcPPOCtLIw7/P3vVHhkyZLAdnM0jpuYR5Xbt1P+pR49KOGaTiQnAzNnAk2bAlOmsKK3CVb0kdC+PaVMrVULOHaMLL30dK+lYpzErNAffZTO9yOPBEbjPP00KVo30labLd3Jk0mmU6eAChWc79tNkpKAa66h7bvucr+4i6awoo+E7Gz6A6hodPnywIIF3srEOIsqUA0A8+cDNWqQojdH4yxcSAun3FT0iYmGFa9j5E1+Pv3erVoB993ntTTaoNm4zyEOHCB/bEGB/qFtDBEuvLJzZ+Cmm2jbzWshKYmKg7doYcilo1vjyBHg2mspFfTx40BentcSaQFb9JEwcybwt79R1ZukJLI6GL0Jp+jvvtt4j5sRWBUrAhMm0PbmzYaMumF2Ud1wA82TuDnprSlRKXohRDqAsQBaApAA7gKwEcBkAA0A7ABwk5SybD+WzRcfh1fGB88+SxODQHjL3cs4ekDP65DvNUeI1nXzBoA5UsqmAC4EsB7ACAALpJSNASzw75dtuPBI/HHllcBf/kLbNWoAGRm0feONQJs2tJ2VBVSr5o48e/bQ3NBHH9Fk5dy5egYE8OJERyi1RS+ESAPQBcCdACClLARQKIToA6Cr/20TAHwH4JFohPQcc2jbgAFUO5bRm59/poiP5s0peZmisNAoNqJcKW5QVER9C0HJ9erWda9vN+E4ekeIxqLPBpAL4CMhxGohxFghRCUANaSUqiLDAQA1Qv2zEGKwEGKFEGJFbm5uFGK4gNnKuOkmzr8RDwwYQOGTVsy+ezcxX4O7dgFTp9LiLd2oX5+imbp2ZUVvI9Eo+iQAbQC8I6VsDeAkLG4aKaUE+e6DkFK+L6VsK6VsW7169SjEcIG+fYFp02i5e24uVRhi9MYcL//MM8CDD9K2WdG//LJ7KavNo8offgD69QP27XOnbzepVImUfI0alOph6FCvJdKCaCZj9wDYI6Vc5t+fClL0OUKImlLK/UKImgAORiuk5zRuTH8AMGQIsGIFrUxk9MWs0FeuNB7u5oVUv/5KLh43iJc4+kOHKI7+ssv0rovrMqW26KWUBwDsFkI08TddDmAdgJkABvjbBgCYEZWEscCmTcDs2eSb5UiA+CBceGXfvmRNW9udpmpVKn7TuLHeUTebNwN//SulBD90CNi502uJtCDaOPoHAEwUQiQD2AZgIOjhMUUIMQjATgA3RdmH93zyCfDCC5RYiqNu4oNwiv7++433uKnoa9WicpaAofx0VPTmydgRI4A5cyjiiImKqBS9lPIXAG1DHLo8ms+NOczDdbbo44OPPjJCJ82Tgvn55DpJTnZ3srC42EjLoK5FHQ0OjrpxBE6BEAlFRcbNxRZ9fNC9O1UVA4A6dYxcR5ddRkv0AbKyzzvPHXmWLqU4+nnzgEsvBX78kTI86gYvmHIEToEQCWaLvn9/qlDP6M3cuaTgW7QAXn3VaDe7dJ591j15zFE3GRlAhw7u9e0mVouejSpbYIs+Esyhdl27UvpURm/69wfefz+43at89OaomwMHyLW0f//Z/6cscumlFMnUogW7bmyEFX0k3Hcf8L//0faBAxQRwOiN+eH+738DffoEt7/1VmDJQScxW/SbNpGxsX69O327SVoa0LYtkJoKXH898J//eC2RFrCij4QmTciSB4BRo9h1Ew+YXTRbt5KPXLUri37HDvcyK1p91+Y2ndi6lUZSeXlAx47A4MFeS6QFrOgjYflyYNYs2uYJovggXHjl3/5GlibgrmshO5sKn9SubTxodLwOf/6ZfuOcHODgQVqUJkMurmdKAE/GRsLbb1P+jZ07eYIoXjBb7uZzPmSI8R434+ibNAFeeom2Dx40ZNQN88jlvfeoepfZXcaUClb0kWC96aWkuGbdCjMzBt98A9SrR9vmUVxuLuU8qlzZeABISVklnaSggLKmpqXpbdFbo25UGyv6qGBNFQnmC01n/yhj0LUr0LAhbdevD7RuTdutWxuJturUAdq1c8e1MGMGkJkJbNwInH8+8NtvwOV6rUsEED9zES7Dij4SzAumrrmG8pCzhaEvPh/w3/8C69bR/pAhwKJFxjF17u+5h+Zv3BjZKTdNYiJQoQJwwQVAlSrO9+s24Sx6JipY0UeC2XXTqhUVaWZFry8FBZQi98svg495nY8+KQk4dgx48009wyv79wc2bKD0EzqnenAZVvSR8NJLwKef0vb+/RRSd+aMtzIxzmG2ngHg3XeNdAjm0d0nnwAXXuhOxTGzoj9yhPLjL1/ufL9uk55OE89JSUCPHsDHH1NhdCYqWNFHwvnnkyUPUGWfzp2B48e9lYlxDquiz8mhML/i4kCL/tAh8pW78dA3L5jS2Xe9fDktkiosBJo1A26/nSa/mahgRR8Js2cDX31F2+w31B+zUjW/+nxUXvCaa4Lbneaii4Dnnye/vM4ujYULgX/+k77bwYM0es7P91qqMg+HV0bCq6+S3/bqq1nRxwPmvDLm16Ii4KGHjPe5aVm3bm1E/pw65V6/bmN2Uc2ZQ7V7t241IqCYUsGKPhLMflmdh80MkZFB5SLr1qV9dc59PlI6VavSe8ztTnPsGHD0KMmks0VvfsiyUWUb7LqJBOuCKYAvPp0pV45cJVlZtN+gAdCtGy2KatyY8h0BFEffrRu932nGjiU5/viDJiy3bwfuvNP5ft2mqIjCVRMS+F6zEVb0kWBNU/zFF1SlntGT48dp+b0qAN+vH7BgAcWvS2kooN69qb16dedlsi4katCAVufqhrXIj2pjooIVfSSYLfp69ag6fWqqtzIxznHwIHDvvcCyZYHt1mgcNzFPEBcVAf/6l3uZM91k5Ehg2zbaZkVvG6zoI2HKFEpsBlCo3ezZHF6pM1aF/tlnNBmoCn0oBfTll9SuFJOTWCeIH3sM+O475/t1m8qVKUMnQOkl/vc/o4wjU2pY0UdCdjblOwGAn34CevWiSTlGT6yK/uRJ8omraBfVfvo0tbsR/qdkSkjQOyBg1izglVdou2ZNoG9fmvxmooIVfSRMmEBWPKB3xANDmP3hQGCY5ZgxVDjc3O6Gwu3ZE3jjDZoQFoIUvo7X4FdfGTV6jxyh++7wYW9l0gBW9JHwwgu03B3Q25piCKtFr14TEqispIpnd/Ohf8kllPZAoWs9VfN82Jo1NHr+9VdvZdIAVvSRYI66YYtef5o1o8RaKg2wOvcFBcDq1YaF6eZDf9++wCRmulY646gbR2BFHwlmK4Mtev1JSaHEWip8sV49Kg5+8iTQpg3lOwLIh9ynDxUDcZpXXgE6dDD2d+8GnnnG+X7dhhcnOgKvjI0Es0X/5z8D8+ZRPnBGT/buBSZNAm68kSbhO3emv9276bi6Fi66CJg+3R2ZrFWWMjPd6ddt2KJ3BLboI8F88WVkUPrUjAxvZWKcY+tWSqy1ZUtgu9dx9Ekmu+yFFyj0UDc+/BBYtYq2WdHbBiv6SFi1im4sAMjLAz7/nHymjJ5YFfq8ebT6deVK2lcK6KefKE3CDz84L5PVon/rLUr6pRspKcZixIYNgfnzgU6dvJVJA1jRR0Lt2sZQeds24KabKOkVoyfWNMVFRZR73hpHX1xMxcLdiKM3jyqVbDpaumPHAq+/TtuVK1Moq8o5xJQaVvSR8NJLwDff0DZH3ehPuDTF1atTxSM1KermZOGgQUZ8uepbR0X/xRdUrxegye/PP6dFaUxURK3ohRCJQojVQoiv/PvZQohlQogtQojJQojk6MX0mGefpeE7wJEA8UC4OPpKlajikVqS72aa4k6daCSpSErS09gwj1wOHaLvrGOqB5exw6IfAsBcpfhlAKOklI0A5AEYZEMf3sKRAPHF5ZdT5I2qE6sU+rFjwKJFwXH0bijcDRsCFw7patHzveYIUSl6IUQdAL0BjPXvCwDdAPgDjTEBQN9o+ogJQuWj19GaYoiUFKBWLSDZPxitWRO47TZakn/ZZcCPP1J7ZiZw661GEi4neewxkkHx++/AxInO9+s2rOgdIdo4+tcBPAxAJcbOBHBUSqnOzB4ALtwFDlJcTH/KeqtTh6ItGjXyVi7GOdauJd/wfffRRGDTppQCY+lSOq4UUP36wKefuiOTNeomuex7REPi8wHly9M2K3rbKLVFL4S4GsBBKeXKUv7/YCHECiHEitzc3NKK4TzWCIyUFMo7Uq2adzIxzvL777Tq1JpMy8s4emvUzSuvGKmzdWLJEmDuXNpmRW8b0bhuOgK4VgixA8AkkMu+T15lAAAgAElEQVTmDQDpQgh1RdYBsDfUP0sp35dStpVStq3uRoWe0pKURLHz//d/tH/6NC3qWLfOW7kY57A+3FevpupSM2fSvlL027ZRu4oScVoms6KfOpXy4etIgl8tpabSKOqWW7yVRwNKreillI9KKetIKRsA6A/gWynlrQAWArjR/7YBAGZELaWXCEE1OitUoP0TJyjUbeFCb+VinMMaXikExcqfPh26vbDQHZnMIwldo26efhp4913aTkwE2rcH/vQnT0XSASfi6B8B8H9CiC0gn/04B/pwj9OngeHDjbJtPJzUn3Dhlc2bU9qBli1p382J+aeeAp5/3tjXNepmyhTg229pW8rAlAhMqbElqZmU8jsA3/m3twG42I7PjQlOnaKFKvXqUSwzR93oTzhFX7UqVTxSuLmmonPnwH1dLXrryGXQIHrItWnjnUwawCtjz0W4akM6WlMMceedVBO4Vi3aV+d+3z7g668pnh5wN45+6dJAyzYlxfBl64R5LkIIfUcuLqPhlWIzVn8tW/T6U64c5VlRirRqVeDee2mlZu/eRr3gSpWovXlz52V64AHg8ceN/dmz9ZwnipecPi7D+ejPhTUCIzmZSpzVqOGdTIyzLFoEzJgBPPccULEi5bh55x3KwwIYD/3UVGp3A2vUja5UqEC/uYIVvS2wRX8uQkVgtGjBcfQ6s2IF8NprwJkztC8lXQcqukYpXNXuxujO6rsePZpWy+rGhg1UgF3Bit4WWNGfi+xsutBuv91oe/NNWtjB6Il1XubAAXLnmMP+AAqtLFcO+Pe/nZfJatF//70R168zS5cCjz7qtRRlHlb050JNCJknvv75T+Crr7yTiXGWcFE3BQX0ap2Y98Ki19XSHTiQ0k0omjZlN6kNsKI/F3v20ITb6tVGm643GUOEU/R/+QtVPFLROG4q+nHjAi1bXcMrp0wBfvvN2B83zkgRzpQaVvTnIjcXeO89YOdOo41DvvSmuJheQxUe6d7dmCxUozy38tFfeKGxr+s1aB25PPss8Nln3smjCazoz4U16kZt63iTMcSTT5KyV4pcnfsNG4DJk41UCG7GeX/1VWAcfXo6kJbmfL9uw+GVjsCK/lxYo24AfYfNjIEQxnb58pQG4+hRoH9/WkylGD48eNWqE9xzjzEZDFDUjdmdqANS0gOWFb3tsKI/F6Es+pUrgRde8EYexnmmTQPuv9/YL1eO0gJ36kT75of+iy8CV13lvExWS1dHfD6a/zCPVFjR2wIr+nMhJS3iKFfOaKtbl1ZLMnqydCkwfryxLyWlqv7jD9o3K/pjx6iItdNYwyvHjg0M+dWBpCQq4fjQQ4FtrOijRnMTwQY6daLEZmbGjKHycX3LfpVEJgTWCUEAyMiglAdAoMJt0IAU7ujRzstk7nfNGn3z0ZuZM0ffalouwhZ9aRg1igo/MHri8wUqejXpqlbGejFfY1X0Okbd/PEH5RIyLwSrWZNq8zJRwYr+XKxeTUWZt20z2nS8yRgDq6IHaP+vfyW3TkpKYLsb18LChVTDVqFjQEB+PmUH3bXLaJswIXABFVMqWNGfi927gYkTyUer0PEmYwySk4EqVQLbEhOpUHj79oGrpBMT3bkW2rcnN5G5X92MDWvqCYAWTH30kTfyaAQr+nPBcfTxx6hRgSM4gM75smWkeKztTit6KUnZ/fqr0VatGlC/vrP9uk0oRc/3mi3oqegnTya/6oED0X9WqItPR2uKOTtPPEGuhcGDA9v/+U/guuuc7dvnA+66K9B3/X//B2zZ4my/bsOK3jH0jLpROcLXr4++sHCoBVMLF+of0xzPjBkDbN4MvP660TZ8OHDkCPDLL4HvfeAB5+UJNarUkYQEoEkTWvWrYEVvC3pa9B070usll0T/WcnJlOPEHOKVlmaE2jH2sWMHlevzmiVLgFmzAtv27gVycoInaQ8coHxIThLK0p0yhfLu5Oc727eb1KtHaSbMYcus6G1BTxOhsJAWOVWoEP1n3XAD/Zl57z3ym957b/SfzxhkZ1PCMDcWIJ2NUFE37doB+/dTVSkz3buTFTptmnPyhBpV7t4NLFhAxVHMUUC68d//ei2BFuhp0RcVUeIpcxIoO5k0iTPq2Y2yiq2L07wgXHglEOw+cSPqJpTrRsci9Zs3A126AD/8YLSlpgY/XJkSo6eiHzaMXleujP6z5swB+vQh/6yCh5P28+OP9Prww97KAYSuz5qUBFx5JcXRW9udVvRVqgC//05x/OZ+lay6cOwYKfmjR422SZOAl1/2TiZN0FPRqxhoc5bB0rJtG0U7mBU7R93Yz/79lGbgmWe8loTmYLKyAtsSE2mFZpMmwe1OXwtJSUDLloF1ipWi1+k6DDVy+fprcpUyUaGnov/HP+j12LHoPytcyJdOllQscO+9VFnIGtXiBePHUyUpM4mJNLobOza43elr4fhxyqWzfr3RVr060KpVYDrlsg6HVzqGnop+2TJ6tcOit5aVU9t88dnPe+8Bl15qVHiKJUaOpAymzz4b2D50KPD3vzvb9+HDwJAhwPLlRtsNN9ACKp3qqfKaFcfQU9ErH58dij7UxTdtmj3+/3jijTeC/duKJUuANm2oPq+URjpgrxg5EnjkkcC2AQOAK64InqS9+WbnF0yFirrRkYoVgYsu4nz0DhBbin71antSrx47Ru4b6zC7NKSlAQ0bBg8ndb/p7GboUKBDh9DHFi2ic9+0Ke3b4XKLhh9/DH4obdtGf9ZJ2p07g9Ml2E0o3/X8+ZT/xlzLuKzTrh2wYgU99BWs6G0hthT9Sy8B114b3Wfk5wMFBVSpJsGGrzd4MLB1K5WTU4wfT0vimcg41426eDHQrBnF0QPeK/pQ4ZU33UQjD2v7PfcAt97qrDyhLPq8PHLleL3mwGlefx04dMhrKco8saXo69cnhSpl6T+joICKhSxcSGXenODbb4FPP3Xms3UkKYl87926BR8rLiYLulMnY8gei4pe7Vvb3bA44yWOfuFCsuY3bjTaEhPtMdjinNj6BTdsIEUdzaKZtDSKxU1PBz7+OHqZxo0DevQIbOOom5JTt27olcrr1tGcSqdOwJ//TEv7zz/fffnMhKrPmpQEtG4NfPNNYLsbUTfNmlF6iCuuCJQH0EvRHzlCLjxV4AWg0GZz/V6mVJRa0Qsh6gohFgoh1gkh1gohhvjbM4QQ84UQm/2vkRdXVf558+Kk0lKlij2TsVu2kA/ZDEcClIyff6awyccfDz4mJdCvH9C5M8Wu9+tHoYNeUrcu/ZlJTCQjombNwHY3HvrJyTTaNedX0nHBVKjAh59/NpIUMqUmGou+CMAwKWVzAJcAuF8I0RzACAALpJSNASzw758bs7vm8OHSS7VoEdC8OU1S2RV1E2q4rtMN5jQHDtBwPNQQ/IILyIrPzqa8Ld984/zk5rmYNg14//3AtsRE4LvvgA8/DG53+qG/ezfwwgvA9u1GW2YmJe+rWNHZvt0kXBy9lLEZcluGKLWil1Lul1Ku8m+fALAeQG0AfQBM8L9tAoDIKmib3TVVIx8EBJGTQwtL0tMpTC9ahRxqOXxKCsVUM5Ghwl1vuSX4fPzxh/GQLywkN9nnn7srXyQMH06vH3wQ2H7ffcBzzznb9/btNBoyK/pLLqFJ7BYtnO3bTUJNOus4cvEAW3z0QogGAFoDWAaghpRyv//QAQAhV3QIIQYLIVYIIVbk5uYaM+tjx0ZXOUcplXr17MmEGMpfO2oUxXwzkaHKMG7bFpjHBKBzPWQIbVesSDe515Oxt98enIqhVy+aTLaO7rp1A66/3ll54iWOPisL6No1cJSi41yEB0St6IUQqQCmARgqpQzwlUgpJYCQITRSyvellG2llG2rV69uuGvKl49uwYxSEk8/TUreWvuzpNSqBVx4YXSfEe+Y6+2at4uKaD4mM5P2hSA/uNeKftkyCgwws3EjTRRale3Wrc5lSVWEirpZtYqseZUMTgd69qTIG3OxoPLlSfGzoo+KqBS9EKIcSMlPlFJ+4W/OEULU9B+vCeBgRB923nnA//5H1tQbb5ReqKNHyRdsV2rTESPo4jMzZQpw5532fH480KCBoaTMil5NupuTdaWl2TO3Eg2hwiuHDyfZraO7558PLJThBKF81wUFFLHk9W/lNEOHksFWubLXkpRpoom6EQDGAVgvpXzNdGgmgAH+7QEAZkT0gWlpdMNUqhTdZGzDhpRWeMMGemiYE0HZxa+/AhMn2v+5ujJgAE1kAoERVcpdZ1X0Xlv04dIUA+5NzM+fbyR4ixff9fjxQOPGwe49Jmqiseg7ArgdQDchxC/+v14AXgLQQwixGUB3//65WbcOmD6dbvRoFP1ddwFffEEXy6efRr9E/Omngy02FWkRzcKueCMri1IcmCNvQin6t98mK9lLQkVaJSbSvM8XXwS3O+FWuOIKitsHKA/+oUOBLkQdF0wdOULhzOZrZOFCWnlsHgkyJabUpQSllIsBhMuRenmJP3DKFMoMeMEF0Sl6hV056bdsAdasCWxT1lRxsf4TZHbQqxc9wK2jq1q1KJrEvEAqXD4cN7ngAiMdgyIxkaKtrC5BNxZMJScb8xgKHS36UC6qbduonOBLL0UXjRfnxM7K2EOH6ERmZUWn6K++msL47FL04VZJAnrdZE6yc2fgakdFo0YUmmhenLR6Nc3VeMns2cGLuxITgU2bgldbO6HorZ/3yy/Ao48GFiFPSyNL3+vFZXYSLk2x+RhTKmJH0R8+TFbL3XdHV3R71y6qF6vyptih6K1We+XK9EDiiy8y8vLoIX7VVcB//mO0HzkSnLDqww+BQYPclS8SHnyQXmdYppwGDgQ++sjevhITyUc/wb8cZe1asmjNvuv69akQSufO9vbtJeEWTJmPMaUidhT9oUPkq735Zpq8Ky1Hj9JiqdRU+rxoEyKFmph74AFamKXTqkQnUYp+zRpSWornngt2kaioGy/nPzp3DnwgAZQSuEmT4GuhdWv7o25On6a8P7fdRvuhFKCONG5MI3LzPcujZ1uILUWfmUkx9Bs2lH7J87FjpCwSEmioO3RodHI1bUrFEJjScfo0pY6uWpX+zJNqhw4Fux6qVKGb2sv0u6tXA/v2BbatX0+x9NbR3ZYtwYnOomXuXPpdxo6l3yFU1M3evTQ5PGmSvX17yS23BNejqFhRrypaHhE7iv6//wVeeYUu7mbNShdi5fORNZiebp9cL74YnN9k9mwK4fQ6DLAsUFREaw5atw6t6M0RN0BspCoONYobPZperYp+/HhySdnJQf/Sk7/9Dfj999ALpqSkHDgnTtjbd6xx7bWUK0kVpWFKRewo+mbN6E9FF5RmQvbMGYqdVxVqhg51Jg/Jzp2UPvX0afs/WzcqVyYfds+epOjNcfS5ubGp6MOFV5pfze0+n72uppycwO14iaN/6imaoGdsJzYUvZQUP71+vaHoS5OqOCWFoiL69KH9Zcso8VM03HEHPTzMcCRA5BQXG0qwdWvKLKoIZdH36EHl5Ky+ezcJV3ikShWqT2DGHGprFwcPBm7fey+thM3KCpQH0OsaPHo02MBbvZqseicWPsYRsaHoz5yh4gJLlgAZGdRWGovealVVqRK9ZbhjR3ACMx2tKaeYNYvylaxaRYvPzD7lJ58MfohmZtKcSKgiJW4gJXDZZbTC2kxiYuh1E04o3Jwc40GXk0PzTcnJlAtIoWM0SqhQ5sOHyW9vx9qaOCY2FL26WDMzo3PdLFpEVv0PP9C+HcVHQvlrdbSmnCIvjx7koXKV3HVXcPWu48cpF7w1qZhbCEGrMQcODGxPSqJAgcmTA9vVtWDnQ//WWymOPz2dLPp586jYvXktQvnywI036uXqONuaFb7XoiK2FH21arRacswYCmcrKceO0RBXhT3akSArlL82LY2SsHEty3OjJl8zMijPfOPG5Js/fZoWAlkzlR4/TpOQ6mEdK9x1F70uWRLY3q8fTc4nJ9vXV58+1N+4cfS6ciXdE2b3UMWK9HtefbV9/XoNK3rHiA1NZbboK1WiYg6lqRuqInVU1E39+kCdOtHJFsqi79OHwuq89COXFZSiT0+n87xlC43WNm4kn/38+YHv93oy9vRpspKtBUaaNqVRifWh37AhRd3YGeO+Zg1dy9dfT/H0oaJudKR9e+C66wLbWNHbQmwpejUxt3YtLTcvKUo5KGXxxBPA0qXRyXbppUC7dtF9RjyTl0cutMREI1dJXp6xnN86GZuaSiMlrxR9URHlmLf2v2EDhTJaFf22bVR6MD/fnv4LCynXzujR9DD85pvQUTc+H/2eL79sT7+xwODBRhiromJFGj2npHgjkybEhqKvVo1uJOWfv+46mqgrKcqiV4reDkaPprAvMz/9RJWFSvMwijc6daKJdsBQ9ObUB1ZFL4Q9k+ilJdwqVFXeUFjy+M2dS75yu1LrqoibGjXo2rv5ZlLqCQmBfSckUJ/mEpw60qoVjQK7dvVakjJNbCj6hARaXq4slszM0k3GtmlDSkXVc507lwoo799/9v8rKUeO0IQdL5g6N/36Af/6F22riKq8PEPRh0rK5WVOeuUmCRddE25i3q7JWLOir1GDrjWfLzjdhhAki04ujb/+lUfPDhEbij4vj1YYKkqr6Hv3Bt56y9g/fpxKrZUmJl/RqRNFPJjhqJvIOX7cUILVq1PK4qwsUvRChE49++23wblm3OJciv6JJwLb7Q61VYulsrKMuPl//CP0ClincuF7xalTFKFlZtcuGj0vWOCNTJoQG4o+Nxd4911jv7SK/tSpwMgEO1IV79sXbF1yHH3kdOgA3HQTbaenU1z9FVcAN9xA2RlD5fNv2DDYpeMW5coB11wTOh89EHzO7bbolaJXFr25zYpT1a28IlTUTUEBjZ4PHPBGJk2IDUXv8wXe2JmZpbPC+/QhC1xhh6I/W1k5nawpp1CZK620ahW8WErx5ZcUS+8FGRmU3qJnz8B2tRjPmivf7muhY0eK+KlVy1D0//kPRaJZGTAAaNvWnn5jAQ6vdIzYUPRFRYEVdAYMoDKAJc0fojJXKuxQ9KHi6KtUobJuXq3eLEtYFX2HDrSkf9kyKh8ZikmTYi+aRIX97d4d2N6jB6XZiDaMV9G4MdVkqFABaNmSwk+Li43JYDNjxgD9+9vTbyzAit4xYkfRmy36Cy8k69wa4XAuVC56RUYGTe5UqhSdbNaLr00bWuxTmkVd8YQ5RbEiP5/cYffcA4wcGfr/vJyM3b6dLOmpUwPblXVdvnxge1YWWeF2PfR//ZUyVgJkUHTvboSn6k7v3sG5/VnR20JsrMAoLg606A8fBpYvBy65pGR1Io8eDbToa9akz4mGPn04EqC0qMVS5nOoUhUfOhT+QWkuPlLSh320FBZS5It1UlClZCgoCGzfvZsmCq+91ogqiobhw+kht2wZ7c+YQak9Qi2WqlePFmt55eaym4ceCm4rX54WjXG92KiIDYu+dWujVBtA1nKvXsBvv0X+GVLSDWJnLnqAbiJrabuNG4GLLwa++87evnQjJQV45plAha5SFYfKXKlISyNFa9cipJIQanESQJFAQPBIY/VqyouzY4c9/efkBBbaGDKEMjeGWxWrk6UbKt1zRgb9xmpCnykVsaHoExJoRaRCWUYlmZAtLqYCyt27B7Z37mzEcdtFQQHw88/RhW3GAxkZtPBN1QdQbTt3kiI/m6IHvHHfhAuvVHmNrC4aJ+LozYpebYf6rXQLr2zfXq/cPTFEbCj6PXsCJ7lKk8EyMZHS4FoV/fbttEy9tGRlBa+MZb9hZJw4QSXvzEqwSxcjMiqcor/jDjr35vzrbhFO0SuFe8cdge12rqkoLqZQY/P3zsoi18WKFcHvj5fwynbtAtfZMCUmNhR9Tk6g9VYaRV9QQCtgrb7VaFMVqzS7ZnjBVGRMn07RKNu3G2233065YebMCX4oKypVIsvfi+yg6em0QrNu3cD2cGsn7FxToVbBWi16cyESM7pZ9KEUfUICPeT27vVGJk2IDUUPBFp3FSvSJExJFP3q1RR7bM2GGG3elLOFfOlkTTlBqMlYgM7vFVcAtWuH/r89e4ARI7ypKpSdDUycGFwQXqVssF5fdrpuUlMp5fE11xhtWVkUpRRqonLAAPvr1XoJ135wjNhR9OaIBSGAr7+meOtIsWauVERj0atVttaLr2JFcj94tXqzrKDmMMwT5NOnk5X2/PPhJ1vz8iiOfu1a52WMlEsvpVdreGW7dhQSaX0wlIaUFFLc5lW5999PsfWhJv4ffTS4QEpZJpxFLwQr+iiJDUWvSqWZ6dYtuJzb2VCK3hp106FD4GRgSQgXgVGzJhXGsK6eZAIxpyhWqDUNTz4ZWDHJjFro5sVk7OLFJOP33we2qwpZ1jUZqam0ytccTFBatm6lB6G56Hzt2pQfP1TUTUGBN5FJTjFoUOjJWN2St3lAbMTRh7qIFy+mG71378g+Q03mWhX9M8+UXi4haJVi69al/4x4Qo2clKIOlf7AvB+qvCDgbdRNYWHo1L9bt9Kr1Z2Yk0OrVnv3jr4QzVdfAUOH0oSsiu7JySF3Tqgsn1260O85Z050/XrJggW0QLJaNXLXhaJLFyoixJSa2LDoW7YMbhs1Cnjkkcj+//Rp4N//pvCsWrXsk6tcOco70qtXYPuRI0CzZpSmgTF45BEqmnHyJO3fdhtFQpmxuuhCoR4AsRReqdZ0WFMg7N4NPPAAVYWKloMHqV/zb6TmBlShFjPhom7mzaORQawzcyZNyHfvTvHzR4+GHqF8803J3LhMELGh6EPd8DVrUmGPkSPPPSlboQJFcowbF/xZo0eT8jdntYwUKcPn29mwgSvTm/npJ+C99ygr5c8/A//8J0243nln4PsiWeGYmEjKPlRqXqcJp+hVhSNrXng7J+ZzcshyN0cbnS3ENFTUzY8/UmSTGlXFKgcP0mgZAN5+m+7bRo2AYcO8lUtTHFH0QoirhBAbhRBbhBBhxmPn4LHHqGbmSy/RkPi//w39PuXP7NgRaNEi+PiZMxR2aS1CHQmHDtFNN2ZMYDtH3QRy5gwV9K5dG3j2WVL6//kPnTtr0ZdIq38dPOhNTvpw9VkvuIBe//KXwHY7o0Ksq2IBCjVOSKD7wYrVot+6lVJ2qKR7J08GFzOPBaQkJX/8OOX1URPdx4+HduN27Bi8loUpEbb76IUQiQDGAOgBYA+An4UQM6WUYVIVhqFmTcpi+PjjpDwaNaL2fftIaVeqRDfXVVdRmFk4/56ybHbsoOiFChXo/44eNSx29ZeWRsfPnCG3gRo2W607dTGaLfpjx2jUICX5pnNyyApUE8GHD9Mwf8UKYOVK+oxrriGrt7iY+lKjDiHoLzWVLEifj/4/MZFu+tOnSYHWrg386U80Kbdnj6EU1KgmJYXcT2fOGP5z9dkJCfQbJiXR/x8+TJ+5dy/JUr06RRZVrUrHlTsmIYH+EhPpt0pIAF57jW7Y6dNJ5mHDKETx0UfJ5WF+SCck0GjoXIoxJcVw3SiZpaTfIymJvtPp09RWXGz89unpgceLi2n7xAn6a9qUImdOngzti69dm2qXWi3pSPPR5+eHnmRW1+GpU8H5coQgua2rYtXvVb166Jz0SUmGq2PnTponKC6mnP+ZmfQ9Pv2UKq21aEGypqTQ9y8upvvI/NslJBihzT6f8fuo37iwkM555cp0/yxeTPl2atemYAoh6POTkuh+++47un7q1qX3lC9P18eGDSTTSy8ZbtsnnqDzFOq62L6dfPkbNpBb6513qP3nn404e7WgbOBAenCcOUPfLzHRuHYAkr9cOern1CmjXd0LuiKltPUPQAcAc037jwJ49Gz/c9FFF8mIGTTIqp6lnDcv/PunTTPe99Zb1PbLL8GfAUg5YQId/+GHwPaPPgr8zMJCai9f3mhr3jz483r2NI7XqmW0p6VJmZoq5YgRdOzgwdDyvPgiHd+2LfTxMWPo+IoVoY9PmkTHFywIfXzWLDr+v/+FPv7TT3T8/fdDH9+wgY4/9ZSUffsG/kaLF9N7brvtbGczPG+9JWV6enCf8+fT8cmTQ8u0dCkdHzs29PF16+j4qFGhj+/ZE1qe6dPp+KefBrZv3RrY74MPBn9mYqLx/oEDg4+np9Ox9eulXLMmuG9Ayqys4Pbx46V8913a7tRJynLlpPz+e+P4gQNSNmgQ2Nczz9Cx3btDf//XXqPj69aFPq7uhYULQx//8ks6Hu6aWr6cjm/aJKXPZ8g6Zw4dHzAg+Hs2bEjHqlWT8uGHqS0nR8qEBONzMzLob+JEOv7dd6H7nzmTjn/5ZehrvYwBYIU8i25Vf048wmoDMM9Y7QFgXz7fkSMpq6X0P4nPO49CMcPRsyf57k+dotl7gKyLN9+kbWXdCkGfC1BY55tvUh/JyeRCMlOuHEVImMPgRoww4sbT0sgyM0dhPPkkWXXt2pHMUhrWWGoquYeUJa4uPyVPZiaVSFTWV3IyjXhU7HbdusBHH1H/6n8BGr4DNJIZPZq2lfUmJU0oAxQe+M47NDqoXZsiIA4eNKytDh2A1183ZPP5AovF9OsXvKCnY0eaRAvlTouESy+l38x8OwpB30V9N+XaUSMdwIjOaN+ejickkKVWuTL9zmqy/rLL6DdRowVFONfStdfSPJA1jW52Ni3PVyO3vn0NGZTM5s/v359+b3Obis1v2jR037/9ZswRmBkwwNgeNoy+28UXG201alAY8IwZZMH6fHQu1fd89dXA305Kyg0F0Ijm1VepTd0f5coZCeratQOWLqVSf3v3Gpa4uqa6d6ei3nl5NNrcu5esbLXiWJ1HxZVXUvSQcpGZ+fhjss67dTPqQaelURGYggIqvtKgAbWrkdV551FAh7pn1Hlo3tyQ89VXjXbNo3qEVErBrg8U4kYAV0kp7/bv3w6gvZTyH5b3DQYw2L/bEoANYQtlimoADnkthMvwd5iQFl0AAAQhSURBVI4P4u07e/l960spQ8TeBuKERb8XgDlRSB1/WwBSyvcBvA8AQogVUkqNaqKdG/7O8QF/Z/0pC9/XiaibnwE0FkJkCyGSAfQHMNOBfhiGYZgIsN2il1IWCSH+AWAugEQAH0opYyhpCcMwTHzhSDyRlPJrAF+X4F80qYVWIvg7xwf8nfUn5r+v7ZOxDMMwTGwRGykQGIZhGMfwXNHbki4hhhFC1BVCLBRCrBNCrBVCDPG3Zwgh5gshNvtftStzL4RIFEKsFkJ85d/PFkIs85/ryf7Jem0QQqQLIaYKITYIIdYLITrofp6FEA/5r+s1QojPhBApup1nIcSHQoiDQog1praQ51UQo/3f/TchRClzpNuLp4relC6hJ4DmAG4RQjT3UiYHKAIwTErZHMAlAO73f8cRABZIKRsDWODf140hAMxlol4GMEpK2QhAHoBBnkjlHG8AmCOlbArgQtB31/Y8CyFqA3gQQFspZUtQ8EV/6HeexwOwlvIKd157Amjs/xsM4B2XZDwrXlv0FwPYIqXcJqUsBDAJQB+PZbIVKeV+KeUq//YJ0M1fG/Q9J/jfNgFA39CfUDYRQtQB0BvAWP++ANANwFT/W7T6zkKINABdAIwDAClloZTyKDQ/z6CAjgpCiCQAFQHsh2bnWUq5CMARS3O489oHwMf+DAVLAaQLIWq6I2l4vFb0odIlhCkkWvYRQjQA0BrAMgA1pJQqteMBADXC/FtZ5XUADwNQ+aEzARyVUqqsVbqd62wAuQA+8rurxgohKkHj8yyl3AvgVQC7QAr+GICV0Ps8K8Kd15jUaV4r+rhBCJEKYBqAoVLKgCK2/uRE2oQ/CSGuBnBQSrnSa1lcJAlAGwDvSClbAzgJi5tGw/NcFWTBZgOoBaASgl0c2lMWzqvXij6idAllHSFEOZCSnyil/MLfnKOGdP7Xg17J5wAdAVwrhNgBcsd1A/mv0/1DfEC/c70HwB4p5TL//lSQ4tf5PHcHsF1KmSulPAPgC9C51/k8K8Kd15jUaV4reu3TJfh90+MArJdSvmY6NBOASj84AMAMt2VzCinlo1LKOlLKBqBz+q2U8lYACwHc6H+bbt/5AIDdQogm/qbLAayDxucZ5LK5RAhR0X+dq++s7Xk2Ee68zgRwhz/65hIAx0wuHu+IJJexk38AegHYBGArgMe8lseB79cJNKz7DcAv/r9eIJ/1AgCbAXwDIMNrWR36/l0BfOXfbghgOYAtAD4HUN5r+Wz+rn8GsMJ/rqcDqKr7eQbwDIANoOyznwAor9t5BvAZaA7iDGjkNijceQUgQJGEWwH8DopI8vw78MpYhmEYzfHadcMwDMM4DCt6hmEYzWFFzzAMozms6BmGYTSHFT3DMIzmsKJnGIbRHFb0DMMwmsOKnmEYRnP+H72KpXOyl7WoAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "## Plot loss and accuracy por more episodes in order to see if the reward increases \n", "## The reward seems to be the same but the loss function gets smaller \n", "\n", "episodes = list(range(0, 96))\n", "\n", "loss_Sarsa_Bolzman = [2.938,23.077,2.101,1.326,0.585,0.585,3.030,28.383,0.573,\n", " 0.566,0.571,0.579,0.571,0.566,109.56,0.5706,0.5656,0.564,\n", " 0.704,0.556,0.868,0.568,0.579,0.571,0.574,0.561,0.713,0.559,\n", " 0.553,0.568,0.567,0.557,0.573,0.735,0.562,0.566,0.550,0.565,\n", " 3.034,0.565,23.642,0.869,20.407,27.008,0.562,0.555,0.561,0.558,\n", " 0.570,0.553,0.565,0.641,0.568, 0.572,0.571,121.95,0.559,118.69,\n", " 0.556,0.558,0.556,0.560,0.562,0.565,0.567,18.003,0.565,109.29,\n", " 0.561,0.572,2.371,0.573,0.555,0.556,0.569,0.557,0.555,0.563, 0.740,\n", " 0.556,0.553,0.734,0.562,0.559,2.77,0.557,101.28,0.554,1.088,0.568,\n", " 0.880,0.570,0.555,0.559,0.557,0.561]\n", "\n", "plt.plot(episodes, loss_Sarsa_Bolzman, 'r--')\n", "plt.axis([0, 110, 0, 110])\n", "plt.show()\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Conclusion State-Action-Reward agent\n", "\n", "#### The State-action-Reward agent with Neural Network model and Boltzmann Gumbel Q Policy\n", "#### No change in comparassion with the other model " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "flatten_1 (Flatten) (None, 100800) 0 \n", "_________________________________________________________________\n", "dense_1 (Dense) (None, 300) 30240300 \n", "_________________________________________________________________\n", "dense_2 (Dense) (None, 9) 2709 \n", "_________________________________________________________________\n", "dense_3 (Dense) (None, 600) 6000 \n", "_________________________________________________________________\n", "dense_4 (Dense) (None, 9) 5409 \n", "_________________________________________________________________\n", "dense_5 (Dense) (None, 600) 6000 \n", "_________________________________________________________________\n", "dense_6 (Dense) (None, 9) 5409 \n", "_________________________________________________________________\n", "dense_7 (Dense) (None, 600) 6000 \n", "_________________________________________________________________\n", "dense_8 (Dense) (None, 9) 5409 \n", "_________________________________________________________________\n", "dense_9 (Dense) (None, 300) 3000 \n", "_________________________________________________________________\n", "dense_10 (Dense) (None, 9) 2709 \n", "_________________________________________________________________\n", "dense_11 (Dense) (None, 300) 3000 \n", "_________________________________________________________________\n", "dense_12 (Dense) (None, 9) 2709 \n", "_________________________________________________________________\n", "activation_1 (Activation) (None, 9) 0 \n", "=================================================================\n", "Total params: 30,288,654\n", "Trainable params: 30,288,654\n", "Non-trainable params: 0\n", "_________________________________________________________________\n", "None\n" ] } ], "source": [ "# Next, we build a neural network model\n", "model2 = Sequential()\n", "model2.add(Flatten(input_shape=(1,) + env.observation_space.shape))\n", "model2.add(Dense(300, activation= 'tanh')) # layer 1: 3 cells with tanh activation function \n", "model2.add(Dense(nb_actions))\n", "model2.add(Dense(600, activation= 'sigmoid')) #layer 2 : 6 cells with sigmoid activation function \n", "model2.add(Dense(nb_actions))\n", "model2.add(Dense(600, activation= 'sigmoid')) #layer 3 : 6 cells with sigmoid activation function \n", "model2.add(Dense(nb_actions))\n", "model2.add(Dense(600, activation= 'sigmoid')) #layer 4 : 6 cells with sigmoid activation function \n", "model2.add(Dense(nb_actions))\n", "model2.add(Dense(300, activation= 'tanh')) #layer 5 : 3 cells with tanh activation function \n", "model2.add(Dense(nb_actions))\n", "model2.add(Dense(300, activation= 'sigmoid')) #layer 6 : 6 cells with sigmoid activation function \n", "model2.add(Dense(nb_actions))\n", "model2.add(Activation('softmax')) # one layer of 1 unit with sigmoid activation function\n", "print(model2.summary())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Q-Learning\n", "\n", "#### Linnear annealining is a technique for aproximating the global optimum of a given function\n", "#### https://en.wikipedia.org/wiki/Simulated_annealing" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "#DQN -- Deep Reinforcement Learning \n", "#Configure and compile the agent. \n", "\n", "# Select a policy. We use eps-greedy action selection, which means that a random action is selected\n", "# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that\n", "# the agent initially explores the environment (high eps) and then gradually sticks to what it knows\n", "# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05\n", "# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.\n", "policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,\n", " nb_steps=1000000)\n", "\n", "\n", "memory = SequentialMemory(limit=1000000, window_length=1)\n", "dqn2 = DQNAgent(model=model2, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50,\n", " target_model_update=1e-2, policy=policy)\n", "dqn2.compile(Adam(lr=1e-3), metrics=['mae', 'acc'])\n", "\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "import os.path\n", "file_path = 'dqn_backup_weights.h5f'\n", "if os.path.exists(file_path):\n", " dqn2.load_weights(file_path)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "from rl.callbacks import Callback\n", "\n", "class Saver(Callback):\n", " def on_episode_end(self, episode, logs={}):\n", " print('episode callback')\n", " if episode % 1 == 0:\n", " self.model.save_weights('dqn_backup_weights.h5f', overwrite=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training for 100000 steps ...\n", "episode callback\n", " 658/100000: episode: 1, duration: 336.758s, episode steps: 658, steps per second: 2, episode reward: 290.000, mean reward: 0.441 [0.000, 10.000], mean action: 3.857 [0.000, 8.000], mean observation: 72.810 [0.000, 228.000], loss: 2.103166, mean_absolute_error: 0.148895, acc: 0.111409, mean_q: 0.205358, mean_eps: 0.999681\n", "episode callback\n", " 1086/100000: episode: 2, duration: 237.365s, episode steps: 428, steps per second: 2, episode reward: 70.000, mean reward: 0.164 [0.000, 10.000], mean action: 3.928 [0.000, 8.000], mean observation: 72.885 [0.000, 228.000], loss: 2.026317, mean_absolute_error: 0.149495, acc: 0.114559, mean_q: 0.213103, mean_eps: 0.999216\n", "episode callback\n", " 1718/100000: episode: 3, duration: 342.532s, episode steps: 632, steps per second: 2, episode reward: 190.000, mean reward: 0.301 [0.000, 10.000], mean action: 4.198 [0.000, 8.000], mean observation: 72.871 [0.000, 228.000], loss: 1.937029, mean_absolute_error: 0.150381, acc: 0.109573, mean_q: 0.250200, mean_eps: 0.998739\n", "episode callback\n", " 2308/100000: episode: 4, duration: 324.798s, episode steps: 590, steps per second: 2, episode reward: 150.000, mean reward: 0.254 [0.000, 10.000], mean action: 3.812 [0.000, 8.000], mean observation: 72.893 [0.000, 228.000], loss: 2.116763, mean_absolute_error: 0.149315, acc: 0.116102, mean_q: 0.224220, mean_eps: 0.998189\n", "episode callback\n", " 2978/100000: episode: 5, duration: 362.901s, episode steps: 670, steps per second: 2, episode reward: 320.000, mean reward: 0.478 [0.000, 10.000], mean action: 3.972 [0.000, 8.000], mean observation: 72.779 [0.000, 228.000], loss: 1.991054, mean_absolute_error: 0.156376, acc: 0.113666, mean_q: 0.301945, mean_eps: 0.997622\n", "episode callback\n", " 4026/100000: episode: 6, duration: 568.897s, episode steps: 1048, steps per second: 2, episode reward: 310.000, mean reward: 0.296 [0.000, 10.000], mean action: 4.072 [0.000, 8.000], mean observation: 72.807 [0.000, 228.000], loss: 1.831619, mean_absolute_error: 0.148026, acc: 0.114206, mean_q: 0.219604, mean_eps: 0.996849\n" ] } ], "source": [ "s = Saver()\n", "dqn2.fit(env, nb_steps=100000,callbacks=[s] , visualize=False, verbose=2)\n", "#dqn2.test(env, nb_episodes=10, visualize=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training for 1000000 steps ...\n", " 654/1000000: episode: 1, duration: 317.012s, episode steps: 654, steps per second: 2, episode reward: 120.000, mean reward: 0.183 [0.000, 10.000], mean action: 3.887 [0.000, 8.000], mean observation: 72.928 [0.000, 228.000], loss: 1.448611, mean_absolute_error: 0.186551, acc: 0.101472, mean_q: 0.763487, mean_eps: 0.999683\n", " 1336/1000000: episode: 2, duration: 352.722s, episode steps: 682, steps per second: 2, episode reward: 260.000, mean reward: 0.381 [0.000, 10.000], mean action: 3.708 [0.000, 8.000], mean observation: 72.801 [0.000, 228.000], loss: 1.389616, mean_absolute_error: 0.164051, acc: 0.099203, mean_q: 0.545559, mean_eps: 0.999105\n", " 2407/1000000: episode: 3, duration: 548.344s, episode steps: 1071, steps per second: 2, episode reward: 710.000, mean reward: 0.663 [0.000, 200.000], mean action: 4.073 [0.000, 8.000], mean observation: 72.658 [0.000, 228.000], loss: 10.424305, mean_absolute_error: 0.192876, acc: 0.112803, mean_q: 0.673903, mean_eps: 0.998316\n", " 3048/1000000: episode: 4, duration: 336.784s, episode steps: 641, steps per second: 2, episode reward: 220.000, mean reward: 0.343 [0.000, 10.000], mean action: 4.105 [0.000, 8.000], mean observation: 72.845 [0.000, 228.000], loss: 6.729588, mean_absolute_error: 0.240661, acc: 0.109887, mean_q: 0.999652, mean_eps: 0.997546\n", " 3740/1000000: episode: 5, duration: 361.328s, episode steps: 692, steps per second: 2, episode reward: 190.000, mean reward: 0.275 [0.000, 10.000], mean action: 3.990 [0.000, 8.000], mean observation: 72.896 [0.000, 228.000], loss: 6.518019, mean_absolute_error: 0.241126, acc: 0.111949, mean_q: 0.994125, mean_eps: 0.996946\n", " 4359/1000000: episode: 6, duration: 302.134s, episode steps: 619, steps per second: 2, episode reward: 230.000, mean reward: 0.372 [0.000, 10.000], mean action: 3.974 [0.000, 8.000], mean observation: 72.816 [0.000, 228.000], loss: 8.690007, mean_absolute_error: 0.241739, acc: 0.112126, mean_q: 0.999973, mean_eps: 0.996356\n", " 4917/1000000: episode: 7, duration: 283.394s, episode steps: 558, steps per second: 2, episode reward: 220.000, mean reward: 0.394 [0.000, 10.000], mean action: 4.077 [0.000, 8.000], mean observation: 72.862 [0.000, 228.000], loss: 10.461510, mean_absolute_error: 0.241632, acc: 0.119288, mean_q: 0.999966, mean_eps: 0.995826\n", " 5493/1000000: episode: 8, duration: 278.382s, episode steps: 576, steps per second: 2, episode reward: 150.000, mean reward: 0.260 [0.000, 10.000], mean action: 3.880 [0.000, 8.000], mean observation: 72.948 [0.000, 228.000], loss: 6.880867, mean_absolute_error: 0.238429, acc: 0.113878, mean_q: 0.986947, mean_eps: 0.995316\n", " 6099/1000000: episode: 9, duration: 302.298s, episode steps: 606, steps per second: 2, episode reward: 150.000, mean reward: 0.248 [0.000, 10.000], mean action: 4.140 [0.000, 8.000], mean observation: 72.914 [0.000, 228.000], loss: 4.512041, mean_absolute_error: 0.235195, acc: 0.114377, mean_q: 1.000000, mean_eps: 0.994784\n", " 6743/1000000: episode: 10, duration: 338.222s, episode steps: 644, steps per second: 2, episode reward: 190.000, mean reward: 0.295 [0.000, 10.000], mean action: 4.057 [0.000, 8.000], mean observation: 72.852 [0.000, 228.000], loss: 7.434086, mean_absolute_error: 0.240786, acc: 0.110540, mean_q: 1.000000, mean_eps: 0.994222\n", " 7332/1000000: episode: 11, duration: 297.293s, episode steps: 589, steps per second: 2, episode reward: 240.000, mean reward: 0.407 [0.000, 10.000], mean action: 4.049 [0.000, 8.000], mean observation: 72.842 [0.000, 228.000], loss: 5.549702, mean_absolute_error: 0.235086, acc: 0.113487, mean_q: 1.000000, mean_eps: 0.993667\n", " 8202/1000000: episode: 12, duration: 449.472s, episode steps: 870, steps per second: 2, episode reward: 270.000, mean reward: 0.310 [0.000, 10.000], mean action: 3.962 [0.000, 8.000], mean observation: 72.817 [0.000, 228.000], loss: 6.081496, mean_absolute_error: 0.237461, acc: 0.110524, mean_q: 1.000000, mean_eps: 0.993010\n", " 9120/1000000: episode: 13, duration: 483.860s, episode steps: 918, steps per second: 2, episode reward: 380.000, mean reward: 0.414 [0.000, 10.000], mean action: 3.974 [0.000, 8.000], mean observation: 72.744 [0.000, 228.000], loss: 7.888651, mean_absolute_error: 0.237701, acc: 0.112337, mean_q: 1.000000, mean_eps: 0.992206\n", " 9915/1000000: episode: 14, duration: 415.255s, episode steps: 795, steps per second: 2, episode reward: 250.000, mean reward: 0.314 [0.000, 10.000], mean action: 4.072 [0.000, 8.000], mean observation: 72.777 [0.000, 228.000], loss: 4.046062, mean_absolute_error: 0.236618, acc: 0.108491, mean_q: 1.000000, mean_eps: 0.991435\n", " 10512/1000000: episode: 15, duration: 318.684s, episode steps: 597, steps per second: 2, episode reward: 200.000, mean reward: 0.335 [0.000, 10.000], mean action: 4.065 [0.000, 8.000], mean observation: 72.892 [0.000, 228.000], loss: 4.564064, mean_absolute_error: 0.235717, acc: 0.114374, mean_q: 1.000000, mean_eps: 0.990808\n", " 10987/1000000: episode: 16, duration: 251.281s, episode steps: 475, steps per second: 2, episode reward: 160.000, mean reward: 0.337 [0.000, 10.000], mean action: 3.882 [0.000, 8.000], mean observation: 72.839 [0.000, 228.000], loss: 7.823420, mean_absolute_error: 0.240771, acc: 0.113355, mean_q: 1.000000, mean_eps: 0.990326\n", " 11657/1000000: episode: 17, duration: 350.044s, episode steps: 670, steps per second: 2, episode reward: 170.000, mean reward: 0.254 [0.000, 10.000], mean action: 4.028 [0.000, 8.000], mean observation: 72.900 [0.000, 228.000], loss: 5.269891, mean_absolute_error: 0.234422, acc: 0.116698, mean_q: 1.000000, mean_eps: 0.989811\n", " 12244/1000000: episode: 18, duration: 307.721s, episode steps: 587, steps per second: 2, episode reward: 140.000, mean reward: 0.239 [0.000, 10.000], mean action: 3.951 [0.000, 8.000], mean observation: 72.859 [0.000, 228.000], loss: 3.343591, mean_absolute_error: 0.232368, acc: 0.111584, mean_q: 1.000000, mean_eps: 0.989245\n", " 12849/1000000: episode: 19, duration: 314.754s, episode steps: 605, steps per second: 2, episode reward: 140.000, mean reward: 0.231 [0.000, 10.000], mean action: 4.187 [0.000, 8.000], mean observation: 72.924 [0.000, 228.000], loss: 5.313445, mean_absolute_error: 0.232026, acc: 0.118802, mean_q: 1.000000, mean_eps: 0.988709\n", " 13479/1000000: episode: 20, duration: 325.365s, episode steps: 630, steps per second: 2, episode reward: 200.000, mean reward: 0.317 [0.000, 10.000], mean action: 3.921 [0.000, 8.000], mean observation: 72.911 [0.000, 228.000], loss: 5.557315, mean_absolute_error: 0.235413, acc: 0.116319, mean_q: 1.000000, mean_eps: 0.988153\n" ] } ], "source": [ "dqn2.fit(env, nb_steps=1000000, visualize=True, verbose=2)\n", "dqn2.test(env, nb_episodes=10, visualize=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }